[PATCH v4] lazymanifest: write a more efficient, pypy friendly version of lazymanifest

2016-10-01 Thread Maciej Fijalkowski
# HG changeset patch
# User Maciej Fijalkowski 
# Date 1473680234 -7200
#  Mon Sep 12 13:37:14 2016 +0200
# Node ID c770219dc4c253d7cd82519ce3c74438bb2829d3
# Parent  df05c43bd1e64f1620d0b2e502f4603c1e5a8341
lazymanifest: write a more efficient, pypy friendly version of lazymanifest

diff --git a/mercurial/manifest.py b/mercurial/manifest.py
--- a/mercurial/manifest.py
+++ b/mercurial/manifest.py
@@ -104,69 +104,300 @@
 _checkforbidden(files)
 return ''.join(lines)
 
-class _lazymanifest(dict):
-"""This is the pure implementation of lazymanifest.
-
-It has not been optimized *at all* and is not lazy.
-"""
-
-def __init__(self, data):
-dict.__init__(self)
-for f, n, fl in _parse(data):
-self[f] = n, fl
-
-def __setitem__(self, k, v):
-node, flag = v
-assert node is not None
-if len(node) > 21:
-node = node[:21] # match c implementation behavior
-dict.__setitem__(self, k, (node, flag))
+class lazymanifestiter(object):
+def __init__(self, lm):
+self.pos = 0
+self.lm = lm
 
 def __iter__(self):
-return iter(sorted(dict.keys(self)))
+return self
 
-def iterkeys(self):
-return iter(sorted(dict.keys(self)))
+def next(self):
+try:
+data, pos = self.lm._get(self.pos)
+except IndexError:
+raise StopIteration
+if pos == -1:
+self.pos += 1
+return data[0]
+self.pos += 1
+zeropos = data.find('\x00', pos)
+return data[pos:zeropos]
 
-def iterentries(self):
-return ((f, e[0], e[1]) for f, e in sorted(self.iteritems()))
+class lazymanifestiterentries(object):
+def __init__(self, lm):
+self.lm = lm
+self.pos = 0
+
+def __iter__(self):
+return self
+
+def next(self):
+try:
+data, pos = self.lm._get(self.pos)
+except IndexError:
+raise StopIteration
+if pos == -1:
+self.pos += 1
+return data
+zeropos = data.find('\x00', pos)
+hashval = unhexlify(data, self.lm.extrainfo[self.pos],
+zeropos + 1, 40)
+flags = self.lm._getflags(data, self.pos, zeropos)
+self.pos += 1
+return (data[pos:zeropos], hashval, flags)
+
+def unhexlify(data, extra, pos, length):
+s = data[pos:pos + length].decode('hex')
+if extra:
+s += chr(extra & 0xff)
+return s
+
+def _cmp(a, b):
+return (a > b) - (a < b)
+
+class _lazymanifest(object):
+def __init__(self, data, positions=None, extrainfo=None, extradata=None):
+if positions is None:
+self.positions = self.findlines(data)
+self.extrainfo = [0] * len(self.positions)
+self.data = data
+self.extradata = []
+else:
+self.positions = positions[:]
+self.extrainfo = extrainfo[:]
+self.extradata = extradata[:]
+self.data = data
+
+def findlines(self, data):
+if not data:
+return []
+pos = data.find("\n")
+if pos == -1 or data[-1] != '\n':
+raise ValueError("Manifest did not end in a newline.")
+positions = [0]
+prev = data[:data.find('\x00')]
+while pos < len(data) - 1 and pos != -1:
+positions.append(pos + 1)
+nexts = data[pos + 1:data.find('\x00', pos + 1)]
+if nexts < prev:
+raise ValueError("Manifest lines not in sorted order.")
+prev = nexts
+pos = data.find("\n", pos + 1)
+return positions
+
+def _get(self, index):
+# get the position encoded in pos:
+#   positive number is an index in 'data'
+#   negative number is in extrapieces
+pos = self.positions[index]
+if pos >= 0:
+return self.data, pos
+return self.extradata[-pos - 1], -1
+
+def _getkey(self, pos):
+if pos >= 0:
+return self.data[pos:self.data.find('\x00', pos + 1)]
+return self.extradata[-pos - 1][0]
+
+def bsearch(self, key):
+first = 0
+last = len(self.positions) - 1
+
+while first <= last:
+midpoint = (first + last)//2
+nextpos = self.positions[midpoint]
+candidate = self._getkey(nextpos)
+r = _cmp(key, candidate)
+if r == 0:
+return midpoint
+else:
+if r < 0:
+last = midpoint - 1
+else:
+first = midpoint + 1
+return -1
+
+def bsearch2(self, key):
+# same as the above, but will always return the position
+# done for performance reasons
+first = 0
+last = len(self.positions) - 1
+
+while first <= last:
+midpoint = (first + last)//2
+nextpos 

Re: [PATCH v3] lazymanifest: write a more efficient, pypy friendly version of lazymanifest

2016-10-01 Thread Maciej Fijalkowski
Fixed, it's error reporting and making sure we truncate the 22 length hash

On Fri, Sep 30, 2016 at 12:06 AM, Augie Fackler  wrote:
> On Wed, Sep 28, 2016 at 01:47:32PM +0200, Maciej Fijalkowski wrote:
>> # HG changeset patch
>> # User Maciej Fijalkowski 
>> # Date 1473680234 -7200
>> #  Mon Sep 12 13:37:14 2016 +0200
>> # Node ID 2c852d298fbf87c5bc0ad7b65563212169915ab3
>> # Parent  df05c43bd1e64f1620d0b2e502f4603c1e5a8341
>> lazymanifest: write a more efficient, pypy friendly version of lazymanifest
>
>  (cd tests && pypy run-tests.py test-manifest.py )
>
>  --- 
> /usr/local/google/home/augie/Programming/hg/crew/tests/test-manifest.py.out
>  +++ 
> /usr/local/google/home/augie/Programming/hg/crew/tests/test-manifest.py.err
>  @@ -0,0 +1,24 @@
>  +FAIL: testNoNewLineAtAll (__main__.testmanifestdict)
>  +
>  +Traceback (most recent call last):
>  +  File 
> "/usr/local/google/home/augie/Programming/hg/crew/tests/test-manifest.py", 
> line 344, in testNoNewLineAtAll
>  +self.fail('Should have raised ValueError')
>  +AssertionError: Should have raised ValueError
>  +FAIL: testNoTerminalNewline (__main__.testmanifestdict)
>  +
>  +Traceback (most recent call last):
>  +  File 
> "/usr/local/google/home/augie/Programming/hg/crew/tests/test-manifest.py", 
> line 337, in testNoTerminalNewline
>  +self.fail('Should have raised ValueError')
>  +AssertionError: Should have raised ValueError
>  +FAIL: testReversedLines (__main__.testmanifestdict)
>  +
>  +Traceback (most recent call last):
>  +  File 
> "/usr/local/google/home/augie/Programming/hg/crew/tests/test-manifest.py", 
> line 330, in testReversedLines
>  +self.fail('Should have raised ValueError')
>  +AssertionError: Should have raised ValueError
>  +FAIL: testSetGetNodeSuffix (__main__.testmanifestdict)
>  +
>  +Traceback (most recent call last):
>  +  File 
> "/usr/local/google/home/augie/Programming/hg/crew/tests/test-manifest.py", 
> line 233, in testSetGetNodeSuffix
>  +self.assertEqual(want, m['foo'])
>  +AssertionError: 
> '\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11a'
>  != 
> '\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11a+'
>
>  ERROR: test-manifest.py output changed
>
> Can you take a look? I'm not immediately sure what's going on, but that's a 
> mostly-standard unittest so it should be pretty straightforward to try and 
> debug?
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


Re: news from the topic experiment

2016-10-01 Thread Gregory Szorc
On Thu, Sep 22, 2016 at 5:26 PM, Augie Fackler  wrote:

>
> > On Sep 22, 2016, at 8:18 PM, Pierre-Yves David <
> pierre-yves.da...@ens-lyon.org> wrote:
> >
> >>> I assume this is along the spirit of your 'hg undo' for evolve (that
> >>> preserves the hash)?
> >>
> >> No. We are thinking about using topic to replace bookmark as the
> recommended
> >> workflow at fb. People can get confused if local bookmarks point to
> public
> >> changesets.
> >
> > I would be happy to discuss that ;-)
>
> As would I. I’ve fielded several user complaints lately that boil down to
> wanting an ephemeral, user-selected name for each draft commit (a la what
> we had with mq patch names). I suspect topics might be able to fill that
> niche as well.
>

Many long-time Mercurial users at Mozilla still use MQ. Having talked to
many of them, many of their arguments for using MQ all have a common theme:
"it's simpler." Specifically:

* `hg qseries` provides a concise view of all their "in progress" work
* Individual commits have names which can be easily referenced - not some
random 12 character hexidecimal value
* The complexities of managing many active heads are hidden from them
(rebase, graft, log -G, etc)
* They like that they can edit patch files in .hg/patches if the VCS gets
in the way

It's worth noting that the Firefox repository has N heads spread out over M
repositories. The Nightly head is in a different repository from the Aurora
head from the Beta head, etc. While each repository shares the same root
changeset and can be pulled into a "unified" repository with multiple
heads, many developers only clone/pull a single repo/head. This means their
local repo only has a single head by default. And if you use MQ, your local
clone continues to only have a single head.

It is important to remember that there is a significant complexity jump
from a single head to multiple heads. Once you introduce multiple heads,
you need to understand:

* The concept of a DAG
* How to find multiple heads
* How to figure out which head you are on
* How to switch your working directory to a different head
* How to copy/move changesets between heads (graft, rebase, etc)
* How to combine multiple heads (merging)
* How to push only what you want to push

Mercurial makes many of these things difficult. For example:

* `hg log` shows changesets from multiple heads without any indication they
are from multiple heads. Contrast with `hg log -G` or `hg log -f`.
* Existing feature development methods except for MQ (branches and
bookmarks) lack a command that concisely lists *all* of "my" commits (`hg
branches` and `hg bookmarks` only list the tip-most changeset and don't
show the changeset author or description by default).
* `hg rebase` must be explicitly enabled. Some users think this means they
shouldn't be using it.
* `hg push` pushes all heads by default. Great if you are backing up your
work to a non-publishing repo. Bad if you are trying to land something to
the canonical repo.
* `hg update` and `hg pull -u`'s heuristic based model can result in
surprises
* No easy way to figure out how the current working directory revision
relates to the overall repo/DAG

Many of us have installed extensions, aliases, etc on our own machines or
have rolled these out to our users to pave over these deficiencies. That
works... where you have control to do that. It doesn't generally work in
open source: people are at the whim of what Mercurial supports out of the
box.

Many of us also understand the concepts of distributed version control -
what the DAG is, how to interact with it, etc. It is really easy to lose
sight of the fact that most users simply want to get stuff done and they
don't care about the complexities. They will learn the minimum number of
commands required to accomplish what they set out to do. A version control
tool is a barrier to them getting actual work done. They will choose the
mechanism that is simplest and fastest.

Despite all of Mercurial's current deficiencies in this area, it's worth
noting that I think it's still better off than Git. If the leap from single
to multiple heads is hard, try introducing remote refs, tracking branches,
detached HEAD, the reflog, the staging area, etc. And on top of that add a
convoluted CLI that makes grasping the concepts difficult. Many of the
hardcore MQ users at Mozilla detest Git because it is too complex. Only
with a baptism of fire and likely a lot of hand holding do they warm up to
Git. And - get this - once they learn the power of the DAG, of rebasing and
interactive history editing, they fall in love. They think Mercurial's MQ
model is primitive. Then I show them how to do "Git like" multiple head
development in Mercurial and they're like "oh, this is basically the same
except there aren't the complexities of understanding refs, the staging
area, etc - this is quite nice."

So, topics.

I desperately want topics to be a compelling replacement for die hard MQ
users. That means 

[PATCH] annotate: calculate line count correctly

2016-10-01 Thread Jun Wu
# HG changeset patch
# User Jun Wu 
# Date 1475327938 -3600
#  Sat Oct 01 14:18:58 2016 +0100
# Node ID dfd539e1e012e2fa78c0635e0e4bc993f7bbd89e
# Parent  3741a8f86e88702595c29f8ed824a28da0cfa961
# Available At https://bitbucket.org/quark-zju/hg-draft
#  hg pull https://bitbucket.org/quark-zju/hg-draft -r dfd539e1e012
# EXP-Topic extensions.debug
annotate: calculate line count correctly

Before this patch, the "lines" function inside "annotate" returns 1 for
empty text (''). This patch makes it 0. Because the function should match
mdiff.splitnewlines (used by mdiff.allblocks), or s.splitlines (used at the
end of the "annotate" method). Both len(mdiff.splitnewlines('')) and
len(''.splitlines(True)) are 0.

This issue was discovered while testing fastannotate [1].

I could not find a test case to reveal this issue. However in theory this
could reduce memory usage a little bit, and avoids surprises when people
are touching this area in the future.

[1]: https://bitbucket.org/facebook/hg-experimental/commits/525b3b98e93a

diff --git a/mercurial/context.py b/mercurial/context.py
--- a/mercurial/context.py
+++ b/mercurial/context.py
@@ -931,5 +931,5 @@ class basefilectx(object):
 if text.endswith("\n"):
 return text.count("\n")
-return text.count("\n") + 1
+return text.count("\n") + int(bool(text))
 
 if linenumber:
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


Re: [PATCH] annotate: calculate line count correctly

2016-10-01 Thread Jun Wu
Excerpts from Jun Wu's message of 2016-10-01 14:20:05 +0100:
> # EXP-Topic extensions.debug

Sorry. I didn't realize the topic name. I have topics extension enabled but
didn't run the topic command. And it reuses the topic the parent has.

This is also one of the reasons I dislike the current topic design - I think
I should be able to commit on top of others' commits (in this case, @)
without thinking about topic names.
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


[PATCH 1 of 2] revset: extract function that creates range set from computed revisions

2016-10-01 Thread Yuya Nishihara
# HG changeset patch
# User Yuya Nishihara 
# Date 1475320308 -32400
#  Sat Oct 01 20:11:48 2016 +0900
# Node ID 95ec9f99f4dc075caa28ca71580e913b35855d84
# Parent  3f4e1c033f40aaa8111de9b8212f05e8e09590aa
revset: extract function that creates range set from computed revisions

So we can pass m=0 to _makerangeset() even if the revision 0 is hidden.
Hidden revisions are filtered by spanset.

diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -366,8 +366,9 @@ def rangeset(repo, subset, x, y, order):
 
 if not m or not n:
 return baseset()
-m, n = m.first(), n.last()
-
+return _makerangeset(repo, subset, m.first(), n.last(), order)
+
+def _makerangeset(repo, subset, m, n, order):
 if m == n:
 r = baseset([m])
 elif n == node.wdirrev:
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


Re: news from the topic experiment

2016-10-01 Thread Pulkit Goyal
> * `hg rebase` must be explicitly enabled. Some users think this means they
> shouldn't be using it.

Yeah as someone who is learning Mercurial I agree, usually I/people think that
we need extensions when we are doing something which is not normal/general.

> * `hg push` pushes all heads by default. Great if you are backing up your
> work to a non-publishing repo. Bad if you are trying to land something to
> the canonical repo.
> * `hg update` and `hg pull -u`'s heuristic based model can result in
> surprises
> * No easy way to figure out how the current working directory revision
> relates to the overall repo/DAG
>
> Many of us have installed extensions, aliases, etc on our own machines or
> have rolled these out to our users to pave over these deficiencies. That
> works... where you have control to do that. It doesn't generally work in
> open source: people are at the whim of what Mercurial supports out of the
> box.
>
> Many of us also understand the concepts of distributed version control -
> what the DAG is, how to interact with it, etc. It is really easy to lose
> sight of the fact that most users simply want to get stuff done and they
> don't care about the complexities. They will learn the minimum number of
> commands required to accomplish what they set out to do. A version control
> tool is a barrier to them getting actual work done. They will choose the
> mechanism that is simplest and fastest.
>
> Despite all of Mercurial's current deficiencies in this area, it's worth
> noting that I think it's still better off than Git. If the leap from single
> to multiple heads is hard, try introducing remote refs, tracking branches,
> detached HEAD, the reflog, the staging area, etc. And on top of that add a
> convoluted CLI that makes grasping the concepts difficult. Many of the
> hardcore MQ users at Mozilla detest Git because it is too complex. Only with
> a baptism of fire and likely a lot of hand holding do they warm up to Git.
> And - get this - once they learn the power of the DAG, of rebasing and
> interactive history editing, they fall in love. They think Mercurial's MQ
> model is primitive. Then I show them how to do "Git like" multiple head
> development in Mercurial and they're like "oh, this is basically the same
> except there aren't the complexities of understanding refs, the staging
> area, etc - this is quite nice."
>
> So, topics.
>
> I desperately want topics to be a compelling replacement for die hard MQ
> users. That means that topics needs to maintain the simplicity of MQ as much
> as possible. If people start with a single head repository, topics needs to
> be as simple to use as MQ is, even if multiple topics do introduce multiple
> heads in the DAG. Yet topics should also provide the features needed by
> advanced users - those who fully understand how the DAG works and how to
> manipulate changesets within it.
>
> For the beginning user, Mercurial/topics needs to provide:
>
> * A mechanism to show all active lines of work (list the topics) (and
> possibly how they relate to each other and the underlying repository)
> * A mechanism to show the current line of work (and possibly how it related
> to the overall repository)
> * A mechanism to show/search for unfinished changesets
> * A mechanism to provide a human friendly name to a changeset
>
> I think the existing topics experiment shows a ton of promise in these
> areas. And as a bonus, the transition from a beginning topics user (1
> published head, 1 topic) to more advanced scenarios is much better than with
> MQ because you don't have to transition to different commands (away from the
> q* MQ commands): you simply add more advanced commands like "rebase" and
> "merge" to your skills set without having to relearn anything. I also love
> how topics "fade away" when changesets become published. That's such an
> improvement over having to manually delete things later. Just as long as the
> user knows when their topics disappear...
>
> There are still some areas for improving topics.
>
> 1) `hg log` still shows *all* changesets in the repository. This is

We should limit the output to a certain number and will be better if we have
hg log --all to get all changesets. Same in the case of hg tags. I
think its better
to limit output to a certain number and have an --all flag which is
quite common.

> confusing for users that don't want to be burdened with the complexity of
> multiple heads. I'd like the behavior of `hg log` to only show ancestors -
> and possibly descendants - of the working directory changeset when the
> working directory changeset is unpublished and has a topic. If we show
> descendants, we should delimit the current changeset somehow, possibly by
> making -G or a mode like it the default in this scenario. Realistically, I
> think `hg log` should behave like `hg log -f` and `hg log --children` should
> supplement that with descendants and a marking of the wdir changeset. If
> there is a branch point in the 

Re: [PATCH] py3: have an utility function to return string

2016-10-01 Thread Yuya Nishihara
On Sun, 2 Oct 2016 06:36:35 +0530, Pulkit Goyal wrote:
> Is encoding.encoding public or private. Can I convert it to unicode?

No. It's read/written freely. We could cache a unicode variant internally if
that matters, but we would need a setter function to invalidate the cache.

% grep encoding.encoding **/*.py
hgext/convert/convcmd.py:# tolocal() because the 
encoding.encoding convert()
hgext/convert/convcmd.py:orig_encoding = encoding.encoding
hgext/convert/convcmd.py:encoding.encoding = 'UTF-8'
hgext/convert/cvs.py:self.encoding = encoding.encoding
hgext/convert/gnuarch.py:self.encoding = encoding.encoding
hgext/highlight/__init__.py:mt = ''.join(tmpl('mimetype', 
encoding=encoding.encoding))
hgext/highlight/__init__.py:mt = ''.join(tmpl('mimetype', 
encoding=encoding.encoding))
hgext/highlight/highlight.py:text = text.decode(encoding.encoding, 
'replace')
hgext/highlight/highlight.py:coloriter = (s.encode(encoding.encoding, 
'replace')
hgext/win32mbcs.py:By default, win32mbcs uses encoding.encoding decided by 
Mercurial.
hgext/win32mbcs.py:_encoding = ui.config('win32mbcs', 'encoding', 
encoding.encoding)
hgext/zeroconf/__init__.py:return name.encode(encoding.encoding)
mercurial/commands.py:('', 'encoding', encoding.encoding, _('set the 
charset encoding'),
mercurial/commands.py:('', 'encodingmode', encoding.encodingmode,
mercurial/commands.py:fm.write('encoding', _("checking encoding 
(%s)...\n"), encoding.encoding)
mercurial/commandserver.py:self.cresult.write(encoding.encoding)
mercurial/commandserver.py:hellomsg += 'encoding: ' + encoding.encoding
mercurial/dispatch.py:reason = reason.encode(encoding.encoding, 
'replace')
mercurial/dispatch.py:encoding.encoding = options["encoding"]
mercurial/dispatch.py:encoding.encodingmode = options["encodingmode"]
mercurial/encoding.py:>>> encoding.encoding = 'utf-8'
mercurial/encoding.py:>>> t = u.encode(encoding.encoding)
mercurial/hgweb/hgweb_mod.py:'encoding': encoding.encoding,
mercurial/hgweb/hgweb_mod.py:encoding.encoding = rctx.config('web', 
'encoding', encoding.encoding)
mercurial/hgweb/hgweb_mod.py:ctype = tmpl('mimetype', 
encoding=encoding.encoding)
mercurial/hgweb/hgwebdir_mod.py:encoding.encoding = 
self.ui.config('web', 'encoding',
mercurial/hgweb/hgwebdir_mod.py:   
encoding.encoding)
mercurial/hgweb/hgwebdir_mod.py:ctype = tmpl('mimetype', 
encoding=encoding.encoding)
mercurial/hgweb/hgwebdir_mod.py:"encoding": encoding.encoding,
mercurial/hgweb/webcommands.py:mt += '; charset="%s"' % 
encoding.encoding
mercurial/i18n.py:_msgcache[message] = u.encode(encoding.encoding, 
"replace")
mercurial/mail.py: encoding.encoding.lower(), 'utf-8']
mercurial/mail.py:for ics in (encoding.encoding, 
encoding.fallbackencoding):
mercurial/mail.py:dom = dom.decode(encoding.encoding).encode('idna')
mercurial/minirst.py:>>> encoding.encoding = 'latin1'
mercurial/minirst.py:>>> encoding.encoding = 'shiftjis'
mercurial/minirst.py:utext = text.decode(encoding.encoding)
mercurial/minirst.py:return utext.encode(encoding.encoding)
mercurial/templatefilters.py:uctext = unicode(text[start:], 
encoding.encoding)
mercurial/templatefilters.py:yield 
(uctext[:w].encode(encoding.encoding),
mercurial/templatefilters.py:   
uctext[w:].encode(encoding.encoding))
mercurial/templatefilters.py:text = unicode(text, encoding.encoding, 
'replace')
mercurial/util.py:line = line.decode(encoding.encoding, 
encoding.encodingmode)
mercurial/util.py:initindent = initindent.decode(encoding.encoding, 
encoding.encodingmode)
mercurial/util.py:hangindent = hangindent.decode(encoding.encoding, 
encoding.encodingmode)
mercurial/util.py:return wrapper.fill(line).encode(encoding.encoding)
tests/test-context.py:encoding.encoding = enc
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


mercurial@30030: 2 new changesets (2 on stable)

2016-10-01 Thread Mercurial Commits
2 new changesets (2 on stable) in mercurial:

http://selenic.com/repo/hg//rev/269a5d121339
changeset:   30029:269a5d121339
branch:  stable
parent:  30010:149433e68974
user:Augie Fackler 
date:Sat Oct 01 15:00:18 2016 -0400
summary: Added tag 3.9.2 for changeset 149433e68974

http://selenic.com/repo/hg//rev/8d74027bd4e7
changeset:   30030:8d74027bd4e7
branch:  stable
tag: tip
user:Augie Fackler 
date:Sat Oct 01 15:00:23 2016 -0400
summary: Added signature for changeset 149433e68974

-- 
Repository URL: http://selenic.com/repo/hg/
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


Re: [PATCH] py3: have an utility function to return string

2016-10-01 Thread Pulkit Goyal
On Fri, Sep 16, 2016 at 7:16 PM, Yuya Nishihara  wrote:
> On Thu, 15 Sep 2016 23:59:59 +0530, Pulkit Goyal wrote:
>> On Thu, Sep 15, 2016 at 7:06 PM, Yuya Nishihara  wrote:
>> > On Wed, 14 Sep 2016 22:45:27 +0530, Pulkit Goyal wrote:
>> >> # HG changeset patch
>> >> # User Pulkit Goyal <7895pul...@gmail.com>
>> >> # Date 1473787789 -19800
>> >> #  Tue Sep 13 22:59:49 2016 +0530
>> >> # Node ID ec133d50af780e84a6a24825b52d433c10f9cd55
>> >> # Parent  85bd31515225e7fdf9bd88edde054db2c74a33f8
>> >> py3: have an utility function to return string
>> >>
>> >> There are cases when we need strings and can't use bytes in python 3.
>> >> We need an utility function for these cases. I agree that this may not
>> >> be the best possible way out. I will be happy if anybody else can suggest
>> >> a better approach. We need this functions for os.path.join(),
>> >
>> > We should stick to bytes for filesystem API, and translate bytes to unicode
>> > at VFS layer as necessary.
>> >
>> > https://www.mercurial-scm.org/wiki/WindowsUTF8Plan
>> >
>> > (Also, we'll have to disable PEP 528 and 529 on Python 3.6, which will 
>> > break
>> > existing repositories.)
>> >
>> > https://docs.python.org/3.6/whatsnew/3.6.html
>> >
>> >> __slots__
>> >
>> > __slots__ can be considered private data, so just use u''.
>> >
>> >> and few more things.
>> >
>> > for instance?
>> This function was motivated from Gregory's reply to
>> https://www.mercurial-scm.org/pipermail/mercurial-devel/2016-August/086704.html
>> , unfortunately I see that he replied to me only so I pasted it here
>> https://bpaste.net/show/ab0d3ea39749
>>
>> I am going through python documentation and there are things like
>> __slots__, is_frozen() which accepts str in both py2 and py3. Since
>> they are not same, I made this function to get help in such cases. If
>> we can use unicodes in __slots__ in py2, than thats good.
>
> Python 2.6-2.7 accepts both str and unicode in general, but mixing them is
> disaster so we've never used unicode whenever possible. Unfortunately, Python 
> 3
> solved that problem by forcing us to use unicode (named str) everywhere, which
> doesn't work in Mercurial because we need to process binary data (including
> unix paths) transparently. All inputs and outputs (except for future Windows
> file API) should be bytes.
>
> So, if is_frozen() of Py3 doesn't take bytes and Py2 doesn't take unicode,
> we'll need a compatibility function like you proposed.
>
>> >> +# This function converts its arguments to strings
>> >> +# on the basis of python version. Strings in python 3
>> >> +# are unicodes and our transformer converts everything to bytes
>> >> +# in python 3. So we need to decode it to unicodes in
>> >> +# py3.
>> >> +
>> >> +def coverttostr(word):
>> >> +if sys.version_info[0] < 3:
>> >> +assert isinstance(word, str), "Not a string in Python 2"
>> >> +return word
>> >> +# Checking word is bytes because we have the transformer, else
>> >> +# raising error
>> >> +assert isinstance(word, bytes), "Should be bytes because of 
>> >> transformer"
>> >> +return word.decode(sys.getfilesystemencoding())
>> >
>> > Can we assume 'word' was encoded in file-system codec?
>>
>> Yeah because of the tranformer, we added b'' everywhere.
>
> As Martijn said, that varies on how 'word' was encoded. Python sources would
> be latin1 or utf-8 in most cases, but a string read from external world is
> different. We assume it as encoding.encoding.

Is encoding.encoding public or private. Can I convert it to unicode?
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel