[PATCH v4] lazymanifest: write a more efficient, pypy friendly version of lazymanifest
# HG changeset patch # User Maciej Fijalkowski# Date 1473680234 -7200 # Mon Sep 12 13:37:14 2016 +0200 # Node ID c770219dc4c253d7cd82519ce3c74438bb2829d3 # Parent df05c43bd1e64f1620d0b2e502f4603c1e5a8341 lazymanifest: write a more efficient, pypy friendly version of lazymanifest diff --git a/mercurial/manifest.py b/mercurial/manifest.py --- a/mercurial/manifest.py +++ b/mercurial/manifest.py @@ -104,69 +104,300 @@ _checkforbidden(files) return ''.join(lines) -class _lazymanifest(dict): -"""This is the pure implementation of lazymanifest. - -It has not been optimized *at all* and is not lazy. -""" - -def __init__(self, data): -dict.__init__(self) -for f, n, fl in _parse(data): -self[f] = n, fl - -def __setitem__(self, k, v): -node, flag = v -assert node is not None -if len(node) > 21: -node = node[:21] # match c implementation behavior -dict.__setitem__(self, k, (node, flag)) +class lazymanifestiter(object): +def __init__(self, lm): +self.pos = 0 +self.lm = lm def __iter__(self): -return iter(sorted(dict.keys(self))) +return self -def iterkeys(self): -return iter(sorted(dict.keys(self))) +def next(self): +try: +data, pos = self.lm._get(self.pos) +except IndexError: +raise StopIteration +if pos == -1: +self.pos += 1 +return data[0] +self.pos += 1 +zeropos = data.find('\x00', pos) +return data[pos:zeropos] -def iterentries(self): -return ((f, e[0], e[1]) for f, e in sorted(self.iteritems())) +class lazymanifestiterentries(object): +def __init__(self, lm): +self.lm = lm +self.pos = 0 + +def __iter__(self): +return self + +def next(self): +try: +data, pos = self.lm._get(self.pos) +except IndexError: +raise StopIteration +if pos == -1: +self.pos += 1 +return data +zeropos = data.find('\x00', pos) +hashval = unhexlify(data, self.lm.extrainfo[self.pos], +zeropos + 1, 40) +flags = self.lm._getflags(data, self.pos, zeropos) +self.pos += 1 +return (data[pos:zeropos], hashval, flags) + +def unhexlify(data, extra, pos, length): +s = data[pos:pos + length].decode('hex') +if extra: +s += chr(extra & 0xff) +return s + +def _cmp(a, b): +return (a > b) - (a < b) + +class _lazymanifest(object): +def __init__(self, data, positions=None, extrainfo=None, extradata=None): +if positions is None: +self.positions = self.findlines(data) +self.extrainfo = [0] * len(self.positions) +self.data = data +self.extradata = [] +else: +self.positions = positions[:] +self.extrainfo = extrainfo[:] +self.extradata = extradata[:] +self.data = data + +def findlines(self, data): +if not data: +return [] +pos = data.find("\n") +if pos == -1 or data[-1] != '\n': +raise ValueError("Manifest did not end in a newline.") +positions = [0] +prev = data[:data.find('\x00')] +while pos < len(data) - 1 and pos != -1: +positions.append(pos + 1) +nexts = data[pos + 1:data.find('\x00', pos + 1)] +if nexts < prev: +raise ValueError("Manifest lines not in sorted order.") +prev = nexts +pos = data.find("\n", pos + 1) +return positions + +def _get(self, index): +# get the position encoded in pos: +# positive number is an index in 'data' +# negative number is in extrapieces +pos = self.positions[index] +if pos >= 0: +return self.data, pos +return self.extradata[-pos - 1], -1 + +def _getkey(self, pos): +if pos >= 0: +return self.data[pos:self.data.find('\x00', pos + 1)] +return self.extradata[-pos - 1][0] + +def bsearch(self, key): +first = 0 +last = len(self.positions) - 1 + +while first <= last: +midpoint = (first + last)//2 +nextpos = self.positions[midpoint] +candidate = self._getkey(nextpos) +r = _cmp(key, candidate) +if r == 0: +return midpoint +else: +if r < 0: +last = midpoint - 1 +else: +first = midpoint + 1 +return -1 + +def bsearch2(self, key): +# same as the above, but will always return the position +# done for performance reasons +first = 0 +last = len(self.positions) - 1 + +while first <= last: +midpoint = (first + last)//2 +nextpos
Re: [PATCH v3] lazymanifest: write a more efficient, pypy friendly version of lazymanifest
Fixed, it's error reporting and making sure we truncate the 22 length hash On Fri, Sep 30, 2016 at 12:06 AM, Augie Facklerwrote: > On Wed, Sep 28, 2016 at 01:47:32PM +0200, Maciej Fijalkowski wrote: >> # HG changeset patch >> # User Maciej Fijalkowski >> # Date 1473680234 -7200 >> # Mon Sep 12 13:37:14 2016 +0200 >> # Node ID 2c852d298fbf87c5bc0ad7b65563212169915ab3 >> # Parent df05c43bd1e64f1620d0b2e502f4603c1e5a8341 >> lazymanifest: write a more efficient, pypy friendly version of lazymanifest > > (cd tests && pypy run-tests.py test-manifest.py ) > > --- > /usr/local/google/home/augie/Programming/hg/crew/tests/test-manifest.py.out > +++ > /usr/local/google/home/augie/Programming/hg/crew/tests/test-manifest.py.err > @@ -0,0 +1,24 @@ > +FAIL: testNoNewLineAtAll (__main__.testmanifestdict) > + > +Traceback (most recent call last): > + File > "/usr/local/google/home/augie/Programming/hg/crew/tests/test-manifest.py", > line 344, in testNoNewLineAtAll > +self.fail('Should have raised ValueError') > +AssertionError: Should have raised ValueError > +FAIL: testNoTerminalNewline (__main__.testmanifestdict) > + > +Traceback (most recent call last): > + File > "/usr/local/google/home/augie/Programming/hg/crew/tests/test-manifest.py", > line 337, in testNoTerminalNewline > +self.fail('Should have raised ValueError') > +AssertionError: Should have raised ValueError > +FAIL: testReversedLines (__main__.testmanifestdict) > + > +Traceback (most recent call last): > + File > "/usr/local/google/home/augie/Programming/hg/crew/tests/test-manifest.py", > line 330, in testReversedLines > +self.fail('Should have raised ValueError') > +AssertionError: Should have raised ValueError > +FAIL: testSetGetNodeSuffix (__main__.testmanifestdict) > + > +Traceback (most recent call last): > + File > "/usr/local/google/home/augie/Programming/hg/crew/tests/test-manifest.py", > line 233, in testSetGetNodeSuffix > +self.assertEqual(want, m['foo']) > +AssertionError: > '\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11a' > != > '\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11a+' > > ERROR: test-manifest.py output changed > > Can you take a look? I'm not immediately sure what's going on, but that's a > mostly-standard unittest so it should be pretty straightforward to try and > debug? ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: news from the topic experiment
On Thu, Sep 22, 2016 at 5:26 PM, Augie Facklerwrote: > > > On Sep 22, 2016, at 8:18 PM, Pierre-Yves David < > pierre-yves.da...@ens-lyon.org> wrote: > > > >>> I assume this is along the spirit of your 'hg undo' for evolve (that > >>> preserves the hash)? > >> > >> No. We are thinking about using topic to replace bookmark as the > recommended > >> workflow at fb. People can get confused if local bookmarks point to > public > >> changesets. > > > > I would be happy to discuss that ;-) > > As would I. I’ve fielded several user complaints lately that boil down to > wanting an ephemeral, user-selected name for each draft commit (a la what > we had with mq patch names). I suspect topics might be able to fill that > niche as well. > Many long-time Mercurial users at Mozilla still use MQ. Having talked to many of them, many of their arguments for using MQ all have a common theme: "it's simpler." Specifically: * `hg qseries` provides a concise view of all their "in progress" work * Individual commits have names which can be easily referenced - not some random 12 character hexidecimal value * The complexities of managing many active heads are hidden from them (rebase, graft, log -G, etc) * They like that they can edit patch files in .hg/patches if the VCS gets in the way It's worth noting that the Firefox repository has N heads spread out over M repositories. The Nightly head is in a different repository from the Aurora head from the Beta head, etc. While each repository shares the same root changeset and can be pulled into a "unified" repository with multiple heads, many developers only clone/pull a single repo/head. This means their local repo only has a single head by default. And if you use MQ, your local clone continues to only have a single head. It is important to remember that there is a significant complexity jump from a single head to multiple heads. Once you introduce multiple heads, you need to understand: * The concept of a DAG * How to find multiple heads * How to figure out which head you are on * How to switch your working directory to a different head * How to copy/move changesets between heads (graft, rebase, etc) * How to combine multiple heads (merging) * How to push only what you want to push Mercurial makes many of these things difficult. For example: * `hg log` shows changesets from multiple heads without any indication they are from multiple heads. Contrast with `hg log -G` or `hg log -f`. * Existing feature development methods except for MQ (branches and bookmarks) lack a command that concisely lists *all* of "my" commits (`hg branches` and `hg bookmarks` only list the tip-most changeset and don't show the changeset author or description by default). * `hg rebase` must be explicitly enabled. Some users think this means they shouldn't be using it. * `hg push` pushes all heads by default. Great if you are backing up your work to a non-publishing repo. Bad if you are trying to land something to the canonical repo. * `hg update` and `hg pull -u`'s heuristic based model can result in surprises * No easy way to figure out how the current working directory revision relates to the overall repo/DAG Many of us have installed extensions, aliases, etc on our own machines or have rolled these out to our users to pave over these deficiencies. That works... where you have control to do that. It doesn't generally work in open source: people are at the whim of what Mercurial supports out of the box. Many of us also understand the concepts of distributed version control - what the DAG is, how to interact with it, etc. It is really easy to lose sight of the fact that most users simply want to get stuff done and they don't care about the complexities. They will learn the minimum number of commands required to accomplish what they set out to do. A version control tool is a barrier to them getting actual work done. They will choose the mechanism that is simplest and fastest. Despite all of Mercurial's current deficiencies in this area, it's worth noting that I think it's still better off than Git. If the leap from single to multiple heads is hard, try introducing remote refs, tracking branches, detached HEAD, the reflog, the staging area, etc. And on top of that add a convoluted CLI that makes grasping the concepts difficult. Many of the hardcore MQ users at Mozilla detest Git because it is too complex. Only with a baptism of fire and likely a lot of hand holding do they warm up to Git. And - get this - once they learn the power of the DAG, of rebasing and interactive history editing, they fall in love. They think Mercurial's MQ model is primitive. Then I show them how to do "Git like" multiple head development in Mercurial and they're like "oh, this is basically the same except there aren't the complexities of understanding refs, the staging area, etc - this is quite nice." So, topics. I desperately want topics to be a compelling replacement for die hard MQ users. That means
[PATCH] annotate: calculate line count correctly
# HG changeset patch # User Jun Wu# Date 1475327938 -3600 # Sat Oct 01 14:18:58 2016 +0100 # Node ID dfd539e1e012e2fa78c0635e0e4bc993f7bbd89e # Parent 3741a8f86e88702595c29f8ed824a28da0cfa961 # Available At https://bitbucket.org/quark-zju/hg-draft # hg pull https://bitbucket.org/quark-zju/hg-draft -r dfd539e1e012 # EXP-Topic extensions.debug annotate: calculate line count correctly Before this patch, the "lines" function inside "annotate" returns 1 for empty text (''). This patch makes it 0. Because the function should match mdiff.splitnewlines (used by mdiff.allblocks), or s.splitlines (used at the end of the "annotate" method). Both len(mdiff.splitnewlines('')) and len(''.splitlines(True)) are 0. This issue was discovered while testing fastannotate [1]. I could not find a test case to reveal this issue. However in theory this could reduce memory usage a little bit, and avoids surprises when people are touching this area in the future. [1]: https://bitbucket.org/facebook/hg-experimental/commits/525b3b98e93a diff --git a/mercurial/context.py b/mercurial/context.py --- a/mercurial/context.py +++ b/mercurial/context.py @@ -931,5 +931,5 @@ class basefilectx(object): if text.endswith("\n"): return text.count("\n") -return text.count("\n") + 1 +return text.count("\n") + int(bool(text)) if linenumber: ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH] annotate: calculate line count correctly
Excerpts from Jun Wu's message of 2016-10-01 14:20:05 +0100: > # EXP-Topic extensions.debug Sorry. I didn't realize the topic name. I have topics extension enabled but didn't run the topic command. And it reuses the topic the parent has. This is also one of the reasons I dislike the current topic design - I think I should be able to commit on top of others' commits (in this case, @) without thinking about topic names. ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 1 of 2] revset: extract function that creates range set from computed revisions
# HG changeset patch # User Yuya Nishihara# Date 1475320308 -32400 # Sat Oct 01 20:11:48 2016 +0900 # Node ID 95ec9f99f4dc075caa28ca71580e913b35855d84 # Parent 3f4e1c033f40aaa8111de9b8212f05e8e09590aa revset: extract function that creates range set from computed revisions So we can pass m=0 to _makerangeset() even if the revision 0 is hidden. Hidden revisions are filtered by spanset. diff --git a/mercurial/revset.py b/mercurial/revset.py --- a/mercurial/revset.py +++ b/mercurial/revset.py @@ -366,8 +366,9 @@ def rangeset(repo, subset, x, y, order): if not m or not n: return baseset() -m, n = m.first(), n.last() - +return _makerangeset(repo, subset, m.first(), n.last(), order) + +def _makerangeset(repo, subset, m, n, order): if m == n: r = baseset([m]) elif n == node.wdirrev: ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: news from the topic experiment
> * `hg rebase` must be explicitly enabled. Some users think this means they > shouldn't be using it. Yeah as someone who is learning Mercurial I agree, usually I/people think that we need extensions when we are doing something which is not normal/general. > * `hg push` pushes all heads by default. Great if you are backing up your > work to a non-publishing repo. Bad if you are trying to land something to > the canonical repo. > * `hg update` and `hg pull -u`'s heuristic based model can result in > surprises > * No easy way to figure out how the current working directory revision > relates to the overall repo/DAG > > Many of us have installed extensions, aliases, etc on our own machines or > have rolled these out to our users to pave over these deficiencies. That > works... where you have control to do that. It doesn't generally work in > open source: people are at the whim of what Mercurial supports out of the > box. > > Many of us also understand the concepts of distributed version control - > what the DAG is, how to interact with it, etc. It is really easy to lose > sight of the fact that most users simply want to get stuff done and they > don't care about the complexities. They will learn the minimum number of > commands required to accomplish what they set out to do. A version control > tool is a barrier to them getting actual work done. They will choose the > mechanism that is simplest and fastest. > > Despite all of Mercurial's current deficiencies in this area, it's worth > noting that I think it's still better off than Git. If the leap from single > to multiple heads is hard, try introducing remote refs, tracking branches, > detached HEAD, the reflog, the staging area, etc. And on top of that add a > convoluted CLI that makes grasping the concepts difficult. Many of the > hardcore MQ users at Mozilla detest Git because it is too complex. Only with > a baptism of fire and likely a lot of hand holding do they warm up to Git. > And - get this - once they learn the power of the DAG, of rebasing and > interactive history editing, they fall in love. They think Mercurial's MQ > model is primitive. Then I show them how to do "Git like" multiple head > development in Mercurial and they're like "oh, this is basically the same > except there aren't the complexities of understanding refs, the staging > area, etc - this is quite nice." > > So, topics. > > I desperately want topics to be a compelling replacement for die hard MQ > users. That means that topics needs to maintain the simplicity of MQ as much > as possible. If people start with a single head repository, topics needs to > be as simple to use as MQ is, even if multiple topics do introduce multiple > heads in the DAG. Yet topics should also provide the features needed by > advanced users - those who fully understand how the DAG works and how to > manipulate changesets within it. > > For the beginning user, Mercurial/topics needs to provide: > > * A mechanism to show all active lines of work (list the topics) (and > possibly how they relate to each other and the underlying repository) > * A mechanism to show the current line of work (and possibly how it related > to the overall repository) > * A mechanism to show/search for unfinished changesets > * A mechanism to provide a human friendly name to a changeset > > I think the existing topics experiment shows a ton of promise in these > areas. And as a bonus, the transition from a beginning topics user (1 > published head, 1 topic) to more advanced scenarios is much better than with > MQ because you don't have to transition to different commands (away from the > q* MQ commands): you simply add more advanced commands like "rebase" and > "merge" to your skills set without having to relearn anything. I also love > how topics "fade away" when changesets become published. That's such an > improvement over having to manually delete things later. Just as long as the > user knows when their topics disappear... > > There are still some areas for improving topics. > > 1) `hg log` still shows *all* changesets in the repository. This is We should limit the output to a certain number and will be better if we have hg log --all to get all changesets. Same in the case of hg tags. I think its better to limit output to a certain number and have an --all flag which is quite common. > confusing for users that don't want to be burdened with the complexity of > multiple heads. I'd like the behavior of `hg log` to only show ancestors - > and possibly descendants - of the working directory changeset when the > working directory changeset is unpublished and has a topic. If we show > descendants, we should delimit the current changeset somehow, possibly by > making -G or a mode like it the default in this scenario. Realistically, I > think `hg log` should behave like `hg log -f` and `hg log --children` should > supplement that with descendants and a marking of the wdir changeset. If > there is a branch point in the
Re: [PATCH] py3: have an utility function to return string
On Sun, 2 Oct 2016 06:36:35 +0530, Pulkit Goyal wrote: > Is encoding.encoding public or private. Can I convert it to unicode? No. It's read/written freely. We could cache a unicode variant internally if that matters, but we would need a setter function to invalidate the cache. % grep encoding.encoding **/*.py hgext/convert/convcmd.py:# tolocal() because the encoding.encoding convert() hgext/convert/convcmd.py:orig_encoding = encoding.encoding hgext/convert/convcmd.py:encoding.encoding = 'UTF-8' hgext/convert/cvs.py:self.encoding = encoding.encoding hgext/convert/gnuarch.py:self.encoding = encoding.encoding hgext/highlight/__init__.py:mt = ''.join(tmpl('mimetype', encoding=encoding.encoding)) hgext/highlight/__init__.py:mt = ''.join(tmpl('mimetype', encoding=encoding.encoding)) hgext/highlight/highlight.py:text = text.decode(encoding.encoding, 'replace') hgext/highlight/highlight.py:coloriter = (s.encode(encoding.encoding, 'replace') hgext/win32mbcs.py:By default, win32mbcs uses encoding.encoding decided by Mercurial. hgext/win32mbcs.py:_encoding = ui.config('win32mbcs', 'encoding', encoding.encoding) hgext/zeroconf/__init__.py:return name.encode(encoding.encoding) mercurial/commands.py:('', 'encoding', encoding.encoding, _('set the charset encoding'), mercurial/commands.py:('', 'encodingmode', encoding.encodingmode, mercurial/commands.py:fm.write('encoding', _("checking encoding (%s)...\n"), encoding.encoding) mercurial/commandserver.py:self.cresult.write(encoding.encoding) mercurial/commandserver.py:hellomsg += 'encoding: ' + encoding.encoding mercurial/dispatch.py:reason = reason.encode(encoding.encoding, 'replace') mercurial/dispatch.py:encoding.encoding = options["encoding"] mercurial/dispatch.py:encoding.encodingmode = options["encodingmode"] mercurial/encoding.py:>>> encoding.encoding = 'utf-8' mercurial/encoding.py:>>> t = u.encode(encoding.encoding) mercurial/hgweb/hgweb_mod.py:'encoding': encoding.encoding, mercurial/hgweb/hgweb_mod.py:encoding.encoding = rctx.config('web', 'encoding', encoding.encoding) mercurial/hgweb/hgweb_mod.py:ctype = tmpl('mimetype', encoding=encoding.encoding) mercurial/hgweb/hgwebdir_mod.py:encoding.encoding = self.ui.config('web', 'encoding', mercurial/hgweb/hgwebdir_mod.py: encoding.encoding) mercurial/hgweb/hgwebdir_mod.py:ctype = tmpl('mimetype', encoding=encoding.encoding) mercurial/hgweb/hgwebdir_mod.py:"encoding": encoding.encoding, mercurial/hgweb/webcommands.py:mt += '; charset="%s"' % encoding.encoding mercurial/i18n.py:_msgcache[message] = u.encode(encoding.encoding, "replace") mercurial/mail.py: encoding.encoding.lower(), 'utf-8'] mercurial/mail.py:for ics in (encoding.encoding, encoding.fallbackencoding): mercurial/mail.py:dom = dom.decode(encoding.encoding).encode('idna') mercurial/minirst.py:>>> encoding.encoding = 'latin1' mercurial/minirst.py:>>> encoding.encoding = 'shiftjis' mercurial/minirst.py:utext = text.decode(encoding.encoding) mercurial/minirst.py:return utext.encode(encoding.encoding) mercurial/templatefilters.py:uctext = unicode(text[start:], encoding.encoding) mercurial/templatefilters.py:yield (uctext[:w].encode(encoding.encoding), mercurial/templatefilters.py: uctext[w:].encode(encoding.encoding)) mercurial/templatefilters.py:text = unicode(text, encoding.encoding, 'replace') mercurial/util.py:line = line.decode(encoding.encoding, encoding.encodingmode) mercurial/util.py:initindent = initindent.decode(encoding.encoding, encoding.encodingmode) mercurial/util.py:hangindent = hangindent.decode(encoding.encoding, encoding.encodingmode) mercurial/util.py:return wrapper.fill(line).encode(encoding.encoding) tests/test-context.py:encoding.encoding = enc ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
mercurial@30030: 2 new changesets (2 on stable)
2 new changesets (2 on stable) in mercurial: http://selenic.com/repo/hg//rev/269a5d121339 changeset: 30029:269a5d121339 branch: stable parent: 30010:149433e68974 user:Augie Facklerdate:Sat Oct 01 15:00:18 2016 -0400 summary: Added tag 3.9.2 for changeset 149433e68974 http://selenic.com/repo/hg//rev/8d74027bd4e7 changeset: 30030:8d74027bd4e7 branch: stable tag: tip user:Augie Fackler date:Sat Oct 01 15:00:23 2016 -0400 summary: Added signature for changeset 149433e68974 -- Repository URL: http://selenic.com/repo/hg/ ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH] py3: have an utility function to return string
On Fri, Sep 16, 2016 at 7:16 PM, Yuya Nishiharawrote: > On Thu, 15 Sep 2016 23:59:59 +0530, Pulkit Goyal wrote: >> On Thu, Sep 15, 2016 at 7:06 PM, Yuya Nishihara wrote: >> > On Wed, 14 Sep 2016 22:45:27 +0530, Pulkit Goyal wrote: >> >> # HG changeset patch >> >> # User Pulkit Goyal <7895pul...@gmail.com> >> >> # Date 1473787789 -19800 >> >> # Tue Sep 13 22:59:49 2016 +0530 >> >> # Node ID ec133d50af780e84a6a24825b52d433c10f9cd55 >> >> # Parent 85bd31515225e7fdf9bd88edde054db2c74a33f8 >> >> py3: have an utility function to return string >> >> >> >> There are cases when we need strings and can't use bytes in python 3. >> >> We need an utility function for these cases. I agree that this may not >> >> be the best possible way out. I will be happy if anybody else can suggest >> >> a better approach. We need this functions for os.path.join(), >> > >> > We should stick to bytes for filesystem API, and translate bytes to unicode >> > at VFS layer as necessary. >> > >> > https://www.mercurial-scm.org/wiki/WindowsUTF8Plan >> > >> > (Also, we'll have to disable PEP 528 and 529 on Python 3.6, which will >> > break >> > existing repositories.) >> > >> > https://docs.python.org/3.6/whatsnew/3.6.html >> > >> >> __slots__ >> > >> > __slots__ can be considered private data, so just use u''. >> > >> >> and few more things. >> > >> > for instance? >> This function was motivated from Gregory's reply to >> https://www.mercurial-scm.org/pipermail/mercurial-devel/2016-August/086704.html >> , unfortunately I see that he replied to me only so I pasted it here >> https://bpaste.net/show/ab0d3ea39749 >> >> I am going through python documentation and there are things like >> __slots__, is_frozen() which accepts str in both py2 and py3. Since >> they are not same, I made this function to get help in such cases. If >> we can use unicodes in __slots__ in py2, than thats good. > > Python 2.6-2.7 accepts both str and unicode in general, but mixing them is > disaster so we've never used unicode whenever possible. Unfortunately, Python > 3 > solved that problem by forcing us to use unicode (named str) everywhere, which > doesn't work in Mercurial because we need to process binary data (including > unix paths) transparently. All inputs and outputs (except for future Windows > file API) should be bytes. > > So, if is_frozen() of Py3 doesn't take bytes and Py2 doesn't take unicode, > we'll need a compatibility function like you proposed. > >> >> +# This function converts its arguments to strings >> >> +# on the basis of python version. Strings in python 3 >> >> +# are unicodes and our transformer converts everything to bytes >> >> +# in python 3. So we need to decode it to unicodes in >> >> +# py3. >> >> + >> >> +def coverttostr(word): >> >> +if sys.version_info[0] < 3: >> >> +assert isinstance(word, str), "Not a string in Python 2" >> >> +return word >> >> +# Checking word is bytes because we have the transformer, else >> >> +# raising error >> >> +assert isinstance(word, bytes), "Should be bytes because of >> >> transformer" >> >> +return word.decode(sys.getfilesystemencoding()) >> > >> > Can we assume 'word' was encoded in file-system codec? >> >> Yeah because of the tranformer, we added b'' everywhere. > > As Martijn said, that varies on how 'word' was encoded. Python sources would > be latin1 or utf-8 in most cases, but a string read from external world is > different. We assume it as encoding.encoding. Is encoding.encoding public or private. Can I convert it to unicode? ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel