D2057: translate base85.c into rust code

2018-03-07 Thread Ivzhh (Sheng Mao)
Ivzhh updated this revision to Diff 6724.
Ivzhh added a comment.


  - merge with stable
  - translate base85.c into rust code
  - move hgbase85 into independent module
  - add hgstorage crate
  - hg status implementation in rust

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D2057?vs=5238=6724

BRANCH
  phab-submit-D2057-2018-02-05 (bookmark) on default (branch)

REVISION DETAIL
  https://phab.mercurial-scm.org/D2057

AFFECTED FILES
  rust/Cargo.lock
  rust/Cargo.toml
  rust/hgbase85/Cargo.toml
  rust/hgbase85/build.rs
  rust/hgbase85/src/base85.rs
  rust/hgbase85/src/cpython_ext.rs
  rust/hgbase85/src/lib.rs
  rust/hgcli/Cargo.toml
  rust/hgcli/build.rs
  rust/hgcli/src/main.rs
  rust/hgstorage/Cargo.toml
  rust/hgstorage/src/changelog.rs
  rust/hgstorage/src/config.rs
  rust/hgstorage/src/dirstate.rs
  rust/hgstorage/src/lib.rs
  rust/hgstorage/src/local_repo.rs
  rust/hgstorage/src/manifest.rs
  rust/hgstorage/src/matcher.rs
  rust/hgstorage/src/mpatch.rs
  rust/hgstorage/src/path_encoding.rs
  rust/hgstorage/src/repository.rs
  rust/hgstorage/src/revlog.rs
  rust/hgstorage/src/revlog_v1.rs
  rust/hgstorage/src/working_context.rs

CHANGE DETAILS

diff --git a/rust/hgstorage/src/working_context.rs 
b/rust/hgstorage/src/working_context.rs
new file mode 100644
--- /dev/null
+++ b/rust/hgstorage/src/working_context.rs
@@ -0,0 +1,108 @@
+use std::path::PathBuf;
+use std::io::prelude::*;
+use std::fs;
+use std::collections::HashMap;
+use std::collections::HashSet as Set;
+use std::sync::{Arc, Mutex, RwLock};
+
+use threadpool::ThreadPool;
+use num_cpus;
+
+use dirstate::{CurrentState, DirState};
+use local_repo::LocalRepo;
+use manifest::{FlatManifest, ManifestEntry};
+use changelog::ChangeLog;
+
+pub struct WorkCtx {
+pub dirstate: Arc,
+pub file_revs: HashMap,
+}
+
+impl WorkCtx {
+pub fn new(
+dot_hg_path: Arc,
+manifest: Arc,
+changelog: Arc,
+) -> Self {
+let dirstate = DirState::new(dot_hg_path.join("dirstate"));
+
+let manifest_id = changelog.get_commit_info();
+
+let rev = manifest
+.inner
+.read()
+.unwrap()
+.node_id_to_rev(_id.manifest_id)
+.unwrap();
+
+let file_revs = manifest.build_file_rev_mapping();
+
+let dirstate = Arc::new(RwLock::new(dirstate));
+
+Self {
+dirstate,
+file_revs,
+}
+}
+
+pub fn status(, repo: ) -> CurrentState {
+let mut state = self.dirstate
+.write()
+.unwrap()
+.walk_dir(repo.repo_root.as_path(), );
+
+if !state.lookup.is_empty() {
+let ncpus = num_cpus::get();
+
+let nworkers = if state.lookup.len() < ncpus {
+state.lookup.len()
+} else {
+ncpus
+};
+
+let pool = ThreadPool::new(nworkers);
+
+let clean = Arc::new(Mutex::new(Set::new()));
+let modified = Arc::new(Mutex::new(Set::new()));
+
+for f in state.lookup.drain() {
+let rl = repo.get_filelog(f.as_path());
+let fl = Arc::new(repo.repo_root.join(f.as_path()));
+
+let (id, p1, p2) = {
+let id = _revs[f.as_path()].id;
+let gd = rl.read().unwrap();
+let rev = gd.node_id_to_rev(id).unwrap();
+
+let p1 = gd.p1_nodeid();
+let p2 = gd.p2_nodeid();
+(id.clone(), p1, p2)
+};
+
+let clean = clean.clone();
+let modified = modified.clone();
+
+pool.execute(move || {
+let mut wfile = fs::File::open(fl.as_path()).unwrap();
+let mut content = Vecnew();
+wfile.read_to_end( content).unwrap();
+if rl.read().unwrap().check_hash(, , ) == id 
{
+clean.lock().unwrap().insert(f);
+} else {
+modified.lock().unwrap().insert(f);
+}
+});
+}
+
+pool.join();
+assert_eq!(pool.panic_count(), 0);
+
+let mut gd = modified.lock().unwrap();
+state.modified.extend(gd.drain());
+let mut gd = clean.lock().unwrap();
+state.clean.extend(gd.drain());
+}
+
+return state;
+}
+}
diff --git a/rust/hgstorage/src/revlog_v1.rs b/rust/hgstorage/src/revlog_v1.rs
new file mode 100644
--- /dev/null
+++ b/rust/hgstorage/src/revlog_v1.rs
@@ -0,0 +1,422 @@
+use std::path::{Path, PathBuf};
+use std::io;
+use std::io::{BufReader, Read, Seek, SeekFrom};
+use std::fs;
+use std::cell::RefCell;
+use std::sync::{Arc, RwLock};
+use std::collections::HashMap as Map;
+
+use byteorder::{BigEndian, ReadBytesExt};
+use 

D2057: translate base85.c into rust code

2018-02-07 Thread Ivzhh (Sheng Mao)
Ivzhh added a comment.


  Thank you @indygreg!
  
  The OxidationPlan is my best reference when I started to make a move, and 
this thread is even more helpful. I am really interested in exploring this ;-) 
In 2014 I was trying to change the hg backend storage to Postgres, a silly and 
failed experiment.
  
  Anyway, I will save everyone's time and stop talking. I will come back later 
with a more meaningful implementation.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D2057

To: Ivzhh, #hg-reviewers
Cc: krbullock, indygreg, durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D2057: translate base85.c into rust code

2018-02-07 Thread indygreg (Gregory Szorc)
indygreg added a comment.


  To be honest, we're not yet sure what we'll decide for the Python -> Rust 
bridge. The problem is summarized in the `Rust <=> Python Interop` section on 
https://www.mercurial-scm.org/wiki/OxidationPlan.
  
  I suspect at some level we'll need a CPython extension for CPython for 
performance reasons (especially for high volume function calls). PyPy obviously 
uses CFFI. I think the ideal outcome is we can write Rust that exposes a C API 
and use CFFI natively on PyPy and something like `cbindgen` + `Milksnake` to 
auto-generate a CPython extension that acts as a wrapper around the C API 
exposed by Rust. I'm not sure if anyone has invented this exact wheel yet. If 
not, it's probably faster to use `rust-cpython`. Maybe several months from now 
we have enough Rust and maintaining `rust-cpython` is painful enough that we 
pursue the auto-generated CPython extension route.
  
  What I'm trying to say is you have a green field to explore! But at this 
juncture, perfect is the enemy of done. We'll be happy with any forward 
progress, even failed experiments.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D2057

To: Ivzhh, #hg-reviewers
Cc: krbullock, indygreg, durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D2057: translate base85.c into rust code

2018-02-07 Thread Ivzhh (Sheng Mao)
Ivzhh added a comment.


  As the author of this patch, actually I have the same concern. I started to 
translate base85 as baby steps to find a way of integrating rust and cpython, 
on my side, Today I modify setup.py, policy.py and makefile to run hg's test 
suit with the new base85. For myself, it is only proof of concept.
  
  Maybe I should take another way: translate more python modules into 
CFFI-style, and let CFFI call rust implementation. And gradually change more 
implementations of python modules with corresponding cffi-style, while keep the 
python interface the same. My own hope is the rust routines will be able to 
call each other and eventually run some __basic__ tasks without calling python 
part. And the rust still lazily provides info to python interface for 
extensions etc.
  
  I am exploring this way now, and hope the findings will be useful for 
community to make decision.
  
  Thank you all for the comments!

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D2057

To: Ivzhh, #hg-reviewers
Cc: krbullock, indygreg, durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D2057: translate base85.c into rust code

2018-02-07 Thread krbullock (Kevin Bullock)
krbullock added a comment.


  What would be the advantage of taking this? Since we already have the C 
implementation, it's not likely to gain us any performance. On the other hand, 
it might make a good test case for integrating Rust and Python, finding the 
right API boundaries and experimenting with different approaches, precisely 
//because// we already have a C implementation. @indygreg @durin42 what are 
your thoughts about it?

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D2057

To: Ivzhh, #hg-reviewers
Cc: krbullock, indygreg, durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D2057: translate base85.c into rust code

2018-02-07 Thread kevincox (Kevin Cox)
kevincox added a comment.


  I agree with the splitting comments :) In fact there might already be a 
base85 crate which can be used: https://docs.rs/zero85. Either way I'll hold 
off on the review, feel free to ping me when you are ready for me to take a 
look.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D2057

To: Ivzhh, #hg-reviewers
Cc: indygreg, durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D2057: translate base85.c into rust code

2018-02-06 Thread Ivzhh (Sheng Mao)
Ivzhh added a comment.


  Thank you @indygreg for your detailed explanation!
  
  I understand the process now, and I will go back reading the developer's 
guide thoroughly again. I will try my best to provide a relatively clean stack 
of patches.
  
  Thank you for you time!

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D2057

To: Ivzhh, #hg-reviewers
Cc: indygreg, durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D2057: translate base85.c into rust code

2018-02-06 Thread indygreg (Gregory Szorc)
indygreg added a comment.


  We generally prefer that patches to Mercurial be small and do a single thing. 
This makes it easier to review and understand changes, since each change can be 
evaluated in isolation. If you submit changesets together using `hg phabsend`, 
they automatically show up as a //stack// in Phabricator. And if changesets at 
the bottom of the stack are ready to land, we generally land those without 
waiting for the entire stack to land. This enables forward progress to be made 
and this is generally better for everyone than waiting until a series of 
commits is perfect before adding any of them.
  
  What that means is you should ideally split this work into smaller parts. For 
example:
  
  1. Add the pure Rust code/crate
  2. Add the Python Rust code/crate
  3. Build system / module policy changes
  
  I'm not sure of the order of things though. Since this is the first Rust 
extension, it's not clear what needs to be implemented in what order. I'm fine 
looking at a large commit if things are too tightly coupled to separate. But 
you should strive to make smaller commits.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D2057

To: Ivzhh, #hg-reviewers
Cc: indygreg, durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D2057: translate base85.c into rust code

2018-02-06 Thread Ivzhh (Sheng Mao)
Ivzhh added a comment.


  Sure, thank you for the comments! I can definitely prepare makefile and 
setup.py to make the building process work with rust part. I am planning to 
change the policy.py module to support and try to load rust modules and run all 
the tests. I will submit a new patch after finishing these two tasks.
  
  After reading wiki/OxidationPlan again, I plan to change to cffi for better 
compatibility (pypy and others), and try to build algorithms in pure rust. 
Shall I wait till migrating to cffi based solution now and resubmit this patch 
with all three changes (building, testing, and cffi)?
  
  Thank you!

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D2057

To: Ivzhh, #hg-reviewers
Cc: indygreg, durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D2057: translate base85.c into rust code

2018-02-06 Thread indygreg (Gregory Szorc)
indygreg added a comment.


  Yes, we should definitely split things into multiple crates. Small, 
narrowly-focused crates does seem to be the Rust way, after all.
  
  `hgcli` should be for things specific to the Rust implementation of `hg`. I 
think this can also include the feature set of `chg` (once we've ported `chg` 
to Rust).
  
  I definitely support separating the "pure Rust" from the "Python Rust" via a 
crate boundary. It is generally useful to have Rust that isn't bound to Python 
because it will facilitate reuse outside of Python contexts. For example, 
someone could implement a Mercurial wire protocol server in pure Rust without 
needing to worry about Python. Of course, we're likely to encounter areas where 
we really want tight coupling in order to achieve optimal performance in 
Python. So we may have to design APIs on the pure Rust side to facilitate 
CPython use. I'm OK with that.
  
  As for how many crates to have, I don't have super strong opinions. I could 
see us putting every little component/subsystem in its own crate. I could also 
see us putting everything in one large crate. I don't think it is worth 
deciding at this early juncture. API design and ability to be reused outside 
its originally intended purpose is the important property to strive for. I 
think that has more to do with how the code is authored rather than which 
crates things are in.
  
  A missing piece of this patch is the build system and module loader 
integration. We have a //module policy// that dictates which implementation of 
a Python module we use. We probably want to introduce a `rust` policy that uses 
Rust-based modules where available and falls back to the `cext` modules/policy 
if a Rust module isn't available. We also need to figure out how to integrate 
Rust into `setup.py`. But I think the build system bit can be deferred until 
we're actually ready to ship Rust, which is still a bit of ways off. I'm happy 
for the workflow to be //run cargo in order to load Rust modules// for the time 
being. But if you can implement `Makefile` and/or `setup.py` integration to 
build these Rust extensions, that would be awesome.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D2057

To: Ivzhh, #hg-reviewers
Cc: indygreg, durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D2057: translate base85.c into rust code

2018-02-06 Thread Ivzhh (Sheng Mao)
Ivzhh added a comment.


  I am open to the three-crates plan. Oirginally I have hgcli and hgext 
separately, and I was planning to replace CFFI. I am a pypy user too, so I will 
be willing to provide a python C API free crate for pypy and others.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D2057

To: Ivzhh, #hg-reviewers
Cc: indygreg, durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D2057: translate base85.c into rust code

2018-02-06 Thread durin42 (Augie Fackler)
durin42 added a subscriber: indygreg.
durin42 added a comment.


  I'd be curious to see what @indygreg has to say about this, maybe wait on his 
input before doing any work in response to my feedback?
  
  I do wonder if we should have at least three crates:
  
  1. hgcli
  2. libmercurial
  3. hgcext
  
  The first one would be the command-line entry point, the last could use the 
cpython API, and libmercurial would be "pure rust" and open the door to 
eventually having a libhg or something that exports C functions and would be 
suitable for cffi and linking into other binaries?

INLINE COMMENTS

> base85.rs:22
> +
> +pub fn b85encode(py: Python, text: , pad: i32) -> PyResult {
> +let text = text.as_bytes();

I think I'd like to separate things a bit more and have a Python-free module, 
and then a glue module that we can use to call into the pure Rust. Part of the 
reason is that in my perfect world we won't use the cpython crate for speedups 
so they can be used from pypy as well. Separating them at least makes it easier 
to have an extern "C" version of the method that can be used from cffi instead 
of only through the CPython API.

(Not sure what opinions others have. It's likely that I'll attempt this 
approach in the near future as part of a continued attempt to speed up `hg 
diff`.)

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D2057

To: Ivzhh, #hg-reviewers
Cc: indygreg, durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D2057: translate base85.c into rust code

2018-02-05 Thread Ivzhh (Sheng Mao)
Ivzhh created this revision.
Herald added subscribers: mercurial-devel, kevincox, durin42.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  - python extension to encode/decode base85
  - add test suits to call encode/decode base85 in rust-/python- convention
  - add proper python environmental setup for developer with multiple python
  
  environment (e.g. conda 2/3 for data processing etc.). Environmental version 
is
  more controllable.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D2057

AFFECTED FILES
  rust/hgcli/src/hgext/base85.rs
  rust/hgcli/src/hgext/cpython_ext.rs
  rust/hgcli/src/hgext/mod.rs
  rust/hgcli/src/main.rs

CHANGE DETAILS

diff --git a/rust/hgcli/src/main.rs b/rust/hgcli/src/main.rs
--- a/rust/hgcli/src/main.rs
+++ b/rust/hgcli/src/main.rs
@@ -6,9 +6,11 @@
 // GNU General Public License version 2 or any later version.
 
 extern crate libc;
-extern crate cpython;
+#[macro_use] extern crate cpython;
 extern crate python27_sys;
 
+pub mod hgext;
+
 use cpython::{NoArgs, ObjectProtocol, PyModule, PyResult, Python};
 use libc::{c_char, c_int};
 
diff --git a/rust/hgcli/src/hgext/mod.rs b/rust/hgcli/src/hgext/mod.rs
new file mode 100644
--- /dev/null
+++ b/rust/hgcli/src/hgext/mod.rs
@@ -0,0 +1,129 @@
+extern crate libc;
+
+pub mod base85;
+pub mod cpython_ext;
+
+use std;
+use std::{env, sync};
+use std::path::{PathBuf};
+use std::ffi::{CString, OsStr};
+use python27_sys as ffi;
+use cpython;
+
+#[cfg(target_family = "unix")]
+use std::os::unix::ffi::{OsStrExt};
+
+static HG_EXT_REG: sync::Once = sync::ONCE_INIT;
+
+#[no_mangle]
+pub fn init_all_hg_ext(_py: cpython::Python) {
+HG_EXT_REG.call_once(|| {
+unsafe {
+base85::initoxidized_base85();
+}
+});
+}
+
+#[derive(Debug)]
+pub struct Environment {
+_exe: PathBuf,
+python_exe: PathBuf,
+python_home: PathBuf,
+mercurial_modules: PathBuf,
+}
+
+// On UNIX, platform string is just bytes and should not contain NUL.
+#[cfg(target_family = "unix")]
+fn cstring_from_os>(s: T) -> CString {
+CString::new(s.as_ref().as_bytes()).unwrap()
+}
+
+#[cfg(target_family = "windows")]
+fn cstring_from_os>(s: T) -> CString {
+CString::new(s.as_ref().to_str().unwrap()).unwrap()
+}
+
+fn set_python_home(env: ) {
+let raw = cstring_from_os(_home).into_raw();
+unsafe {
+ffi::Py_SetPythonHome(raw);
+}
+}
+
+static PYTHON_ENV_START: sync::Once = sync::ONCE_INIT;
+
+/// the second half initialization code are copied from rust-cpython
+/// fn pythonrun::prepare_freethreaded_python()
+/// because this function is called mainly by `cargo test`
+/// and the multi-thread nature requires to properly
+/// set up threads and GIL. In the corresponding version,
+/// prepare_freethreaded_python() is turned off, so the cargo
+/// test features must be properly called.
+pub fn set_py_env() {
+PYTHON_ENV_START.call_once(|| {
+let env = {
+let exe = env::current_exe().unwrap();
+
+let mercurial_modules = std::env::var("HGROOT").expect("must set 
mercurial's root folder (one layer above mercurial folder itself");
+
+let python_exe = std::env::var("HGRUST_PYTHONEXE").expect("set 
PYTHONEXE to the full path of the python.exe file");
+
+let python_home = std::env::var("HGRUST_PYTHONHOME").expect("if 
you don't want to use system one, set PYTHONHOME according to python doc");
+
+Environment {
+_exe: exe.clone(),
+python_exe: PathBuf::from(python_exe),
+python_home: PathBuf::from(python_home),
+mercurial_modules: PathBuf::from(mercurial_modules),
+}
+};
+
+//println!("{:?}", env);
+
+// Tell Python where it is installed.
+set_python_home();
+
+// Set program name. The backing memory needs to live for the duration 
of the
+// interpreter.
+//
+// TODO consider storing this in a static or associating with lifetime 
of
+// the Python interpreter.
+//
+// Yes, we use the path to the Python interpreter not argv[0] here. The
+// reason is because Python uses the given path to find the location of
+// Python files. Apparently we could define our own ``Py_GetPath()``
+// implementation. But this may require statically linking Python, 
which is
+// not desirable.
+let program_name = cstring_from_os(_exe).as_ptr();
+unsafe {
+ffi::Py_SetProgramName(program_name as *mut i8);
+}
+
+unsafe {
+//ffi::Py_Initialize();
+
+if ffi::Py_IsInitialized() != 0 {
+// If Python is already initialized, we expect Python 
threading to also be initialized,
+// as we can't make the existing Python main thread acquire 
the GIL.
+assert!(ffi::PyEval_ThreadsInitialized() != 0);
+} else {
+//