D2057: translate base85.c into rust code
Ivzhh updated this revision to Diff 6724. Ivzhh added a comment. - merge with stable - translate base85.c into rust code - move hgbase85 into independent module - add hgstorage crate - hg status implementation in rust REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D2057?vs=5238=6724 BRANCH phab-submit-D2057-2018-02-05 (bookmark) on default (branch) REVISION DETAIL https://phab.mercurial-scm.org/D2057 AFFECTED FILES rust/Cargo.lock rust/Cargo.toml rust/hgbase85/Cargo.toml rust/hgbase85/build.rs rust/hgbase85/src/base85.rs rust/hgbase85/src/cpython_ext.rs rust/hgbase85/src/lib.rs rust/hgcli/Cargo.toml rust/hgcli/build.rs rust/hgcli/src/main.rs rust/hgstorage/Cargo.toml rust/hgstorage/src/changelog.rs rust/hgstorage/src/config.rs rust/hgstorage/src/dirstate.rs rust/hgstorage/src/lib.rs rust/hgstorage/src/local_repo.rs rust/hgstorage/src/manifest.rs rust/hgstorage/src/matcher.rs rust/hgstorage/src/mpatch.rs rust/hgstorage/src/path_encoding.rs rust/hgstorage/src/repository.rs rust/hgstorage/src/revlog.rs rust/hgstorage/src/revlog_v1.rs rust/hgstorage/src/working_context.rs CHANGE DETAILS diff --git a/rust/hgstorage/src/working_context.rs b/rust/hgstorage/src/working_context.rs new file mode 100644 --- /dev/null +++ b/rust/hgstorage/src/working_context.rs @@ -0,0 +1,108 @@ +use std::path::PathBuf; +use std::io::prelude::*; +use std::fs; +use std::collections::HashMap; +use std::collections::HashSet as Set; +use std::sync::{Arc, Mutex, RwLock}; + +use threadpool::ThreadPool; +use num_cpus; + +use dirstate::{CurrentState, DirState}; +use local_repo::LocalRepo; +use manifest::{FlatManifest, ManifestEntry}; +use changelog::ChangeLog; + +pub struct WorkCtx { +pub dirstate: Arc, +pub file_revs: HashMap , +} + +impl WorkCtx { +pub fn new( +dot_hg_path: Arc, +manifest: Arc, +changelog: Arc, +) -> Self { +let dirstate = DirState::new(dot_hg_path.join("dirstate")); + +let manifest_id = changelog.get_commit_info(); + +let rev = manifest +.inner +.read() +.unwrap() +.node_id_to_rev(_id.manifest_id) +.unwrap(); + +let file_revs = manifest.build_file_rev_mapping(); + +let dirstate = Arc::new(RwLock::new(dirstate)); + +Self { +dirstate, +file_revs, +} +} + +pub fn status(, repo: ) -> CurrentState { +let mut state = self.dirstate +.write() +.unwrap() +.walk_dir(repo.repo_root.as_path(), ); + +if !state.lookup.is_empty() { +let ncpus = num_cpus::get(); + +let nworkers = if state.lookup.len() < ncpus { +state.lookup.len() +} else { +ncpus +}; + +let pool = ThreadPool::new(nworkers); + +let clean = Arc::new(Mutex::new(Set::new())); +let modified = Arc::new(Mutex::new(Set::new())); + +for f in state.lookup.drain() { +let rl = repo.get_filelog(f.as_path()); +let fl = Arc::new(repo.repo_root.join(f.as_path())); + +let (id, p1, p2) = { +let id = _revs[f.as_path()].id; +let gd = rl.read().unwrap(); +let rev = gd.node_id_to_rev(id).unwrap(); + +let p1 = gd.p1_nodeid(); +let p2 = gd.p2_nodeid(); +(id.clone(), p1, p2) +}; + +let clean = clean.clone(); +let modified = modified.clone(); + +pool.execute(move || { +let mut wfile = fs::File::open(fl.as_path()).unwrap(); +let mut content = Vecnew(); +wfile.read_to_end( content).unwrap(); +if rl.read().unwrap().check_hash(, , ) == id { +clean.lock().unwrap().insert(f); +} else { +modified.lock().unwrap().insert(f); +} +}); +} + +pool.join(); +assert_eq!(pool.panic_count(), 0); + +let mut gd = modified.lock().unwrap(); +state.modified.extend(gd.drain()); +let mut gd = clean.lock().unwrap(); +state.clean.extend(gd.drain()); +} + +return state; +} +} diff --git a/rust/hgstorage/src/revlog_v1.rs b/rust/hgstorage/src/revlog_v1.rs new file mode 100644 --- /dev/null +++ b/rust/hgstorage/src/revlog_v1.rs @@ -0,0 +1,422 @@ +use std::path::{Path, PathBuf}; +use std::io; +use std::io::{BufReader, Read, Seek, SeekFrom}; +use std::fs; +use std::cell::RefCell; +use std::sync::{Arc, RwLock}; +use std::collections::HashMap as Map; + +use byteorder::{BigEndian, ReadBytesExt}; +use
D2057: translate base85.c into rust code
Ivzhh added a comment. Thank you @indygreg! The OxidationPlan is my best reference when I started to make a move, and this thread is even more helpful. I am really interested in exploring this ;-) In 2014 I was trying to change the hg backend storage to Postgres, a silly and failed experiment. Anyway, I will save everyone's time and stop talking. I will come back later with a more meaningful implementation. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D2057 To: Ivzhh, #hg-reviewers Cc: krbullock, indygreg, durin42, kevincox, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D2057: translate base85.c into rust code
indygreg added a comment. To be honest, we're not yet sure what we'll decide for the Python -> Rust bridge. The problem is summarized in the `Rust <=> Python Interop` section on https://www.mercurial-scm.org/wiki/OxidationPlan. I suspect at some level we'll need a CPython extension for CPython for performance reasons (especially for high volume function calls). PyPy obviously uses CFFI. I think the ideal outcome is we can write Rust that exposes a C API and use CFFI natively on PyPy and something like `cbindgen` + `Milksnake` to auto-generate a CPython extension that acts as a wrapper around the C API exposed by Rust. I'm not sure if anyone has invented this exact wheel yet. If not, it's probably faster to use `rust-cpython`. Maybe several months from now we have enough Rust and maintaining `rust-cpython` is painful enough that we pursue the auto-generated CPython extension route. What I'm trying to say is you have a green field to explore! But at this juncture, perfect is the enemy of done. We'll be happy with any forward progress, even failed experiments. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D2057 To: Ivzhh, #hg-reviewers Cc: krbullock, indygreg, durin42, kevincox, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D2057: translate base85.c into rust code
Ivzhh added a comment. As the author of this patch, actually I have the same concern. I started to translate base85 as baby steps to find a way of integrating rust and cpython, on my side, Today I modify setup.py, policy.py and makefile to run hg's test suit with the new base85. For myself, it is only proof of concept. Maybe I should take another way: translate more python modules into CFFI-style, and let CFFI call rust implementation. And gradually change more implementations of python modules with corresponding cffi-style, while keep the python interface the same. My own hope is the rust routines will be able to call each other and eventually run some __basic__ tasks without calling python part. And the rust still lazily provides info to python interface for extensions etc. I am exploring this way now, and hope the findings will be useful for community to make decision. Thank you all for the comments! REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D2057 To: Ivzhh, #hg-reviewers Cc: krbullock, indygreg, durin42, kevincox, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D2057: translate base85.c into rust code
krbullock added a comment. What would be the advantage of taking this? Since we already have the C implementation, it's not likely to gain us any performance. On the other hand, it might make a good test case for integrating Rust and Python, finding the right API boundaries and experimenting with different approaches, precisely //because// we already have a C implementation. @indygreg @durin42 what are your thoughts about it? REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D2057 To: Ivzhh, #hg-reviewers Cc: krbullock, indygreg, durin42, kevincox, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D2057: translate base85.c into rust code
kevincox added a comment. I agree with the splitting comments :) In fact there might already be a base85 crate which can be used: https://docs.rs/zero85. Either way I'll hold off on the review, feel free to ping me when you are ready for me to take a look. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D2057 To: Ivzhh, #hg-reviewers Cc: indygreg, durin42, kevincox, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D2057: translate base85.c into rust code
Ivzhh added a comment. Thank you @indygreg for your detailed explanation! I understand the process now, and I will go back reading the developer's guide thoroughly again. I will try my best to provide a relatively clean stack of patches. Thank you for you time! REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D2057 To: Ivzhh, #hg-reviewers Cc: indygreg, durin42, kevincox, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D2057: translate base85.c into rust code
indygreg added a comment. We generally prefer that patches to Mercurial be small and do a single thing. This makes it easier to review and understand changes, since each change can be evaluated in isolation. If you submit changesets together using `hg phabsend`, they automatically show up as a //stack// in Phabricator. And if changesets at the bottom of the stack are ready to land, we generally land those without waiting for the entire stack to land. This enables forward progress to be made and this is generally better for everyone than waiting until a series of commits is perfect before adding any of them. What that means is you should ideally split this work into smaller parts. For example: 1. Add the pure Rust code/crate 2. Add the Python Rust code/crate 3. Build system / module policy changes I'm not sure of the order of things though. Since this is the first Rust extension, it's not clear what needs to be implemented in what order. I'm fine looking at a large commit if things are too tightly coupled to separate. But you should strive to make smaller commits. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D2057 To: Ivzhh, #hg-reviewers Cc: indygreg, durin42, kevincox, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D2057: translate base85.c into rust code
Ivzhh added a comment. Sure, thank you for the comments! I can definitely prepare makefile and setup.py to make the building process work with rust part. I am planning to change the policy.py module to support and try to load rust modules and run all the tests. I will submit a new patch after finishing these two tasks. After reading wiki/OxidationPlan again, I plan to change to cffi for better compatibility (pypy and others), and try to build algorithms in pure rust. Shall I wait till migrating to cffi based solution now and resubmit this patch with all three changes (building, testing, and cffi)? Thank you! REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D2057 To: Ivzhh, #hg-reviewers Cc: indygreg, durin42, kevincox, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D2057: translate base85.c into rust code
indygreg added a comment. Yes, we should definitely split things into multiple crates. Small, narrowly-focused crates does seem to be the Rust way, after all. `hgcli` should be for things specific to the Rust implementation of `hg`. I think this can also include the feature set of `chg` (once we've ported `chg` to Rust). I definitely support separating the "pure Rust" from the "Python Rust" via a crate boundary. It is generally useful to have Rust that isn't bound to Python because it will facilitate reuse outside of Python contexts. For example, someone could implement a Mercurial wire protocol server in pure Rust without needing to worry about Python. Of course, we're likely to encounter areas where we really want tight coupling in order to achieve optimal performance in Python. So we may have to design APIs on the pure Rust side to facilitate CPython use. I'm OK with that. As for how many crates to have, I don't have super strong opinions. I could see us putting every little component/subsystem in its own crate. I could also see us putting everything in one large crate. I don't think it is worth deciding at this early juncture. API design and ability to be reused outside its originally intended purpose is the important property to strive for. I think that has more to do with how the code is authored rather than which crates things are in. A missing piece of this patch is the build system and module loader integration. We have a //module policy// that dictates which implementation of a Python module we use. We probably want to introduce a `rust` policy that uses Rust-based modules where available and falls back to the `cext` modules/policy if a Rust module isn't available. We also need to figure out how to integrate Rust into `setup.py`. But I think the build system bit can be deferred until we're actually ready to ship Rust, which is still a bit of ways off. I'm happy for the workflow to be //run cargo in order to load Rust modules// for the time being. But if you can implement `Makefile` and/or `setup.py` integration to build these Rust extensions, that would be awesome. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D2057 To: Ivzhh, #hg-reviewers Cc: indygreg, durin42, kevincox, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D2057: translate base85.c into rust code
Ivzhh added a comment. I am open to the three-crates plan. Oirginally I have hgcli and hgext separately, and I was planning to replace CFFI. I am a pypy user too, so I will be willing to provide a python C API free crate for pypy and others. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D2057 To: Ivzhh, #hg-reviewers Cc: indygreg, durin42, kevincox, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D2057: translate base85.c into rust code
durin42 added a subscriber: indygreg. durin42 added a comment. I'd be curious to see what @indygreg has to say about this, maybe wait on his input before doing any work in response to my feedback? I do wonder if we should have at least three crates: 1. hgcli 2. libmercurial 3. hgcext The first one would be the command-line entry point, the last could use the cpython API, and libmercurial would be "pure rust" and open the door to eventually having a libhg or something that exports C functions and would be suitable for cffi and linking into other binaries? INLINE COMMENTS > base85.rs:22 > + > +pub fn b85encode(py: Python, text: , pad: i32) -> PyResult { > +let text = text.as_bytes(); I think I'd like to separate things a bit more and have a Python-free module, and then a glue module that we can use to call into the pure Rust. Part of the reason is that in my perfect world we won't use the cpython crate for speedups so they can be used from pypy as well. Separating them at least makes it easier to have an extern "C" version of the method that can be used from cffi instead of only through the CPython API. (Not sure what opinions others have. It's likely that I'll attempt this approach in the near future as part of a continued attempt to speed up `hg diff`.) REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D2057 To: Ivzhh, #hg-reviewers Cc: indygreg, durin42, kevincox, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D2057: translate base85.c into rust code
Ivzhh created this revision. Herald added subscribers: mercurial-devel, kevincox, durin42. Herald added a reviewer: hg-reviewers. REVISION SUMMARY - python extension to encode/decode base85 - add test suits to call encode/decode base85 in rust-/python- convention - add proper python environmental setup for developer with multiple python environment (e.g. conda 2/3 for data processing etc.). Environmental version is more controllable. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D2057 AFFECTED FILES rust/hgcli/src/hgext/base85.rs rust/hgcli/src/hgext/cpython_ext.rs rust/hgcli/src/hgext/mod.rs rust/hgcli/src/main.rs CHANGE DETAILS diff --git a/rust/hgcli/src/main.rs b/rust/hgcli/src/main.rs --- a/rust/hgcli/src/main.rs +++ b/rust/hgcli/src/main.rs @@ -6,9 +6,11 @@ // GNU General Public License version 2 or any later version. extern crate libc; -extern crate cpython; +#[macro_use] extern crate cpython; extern crate python27_sys; +pub mod hgext; + use cpython::{NoArgs, ObjectProtocol, PyModule, PyResult, Python}; use libc::{c_char, c_int}; diff --git a/rust/hgcli/src/hgext/mod.rs b/rust/hgcli/src/hgext/mod.rs new file mode 100644 --- /dev/null +++ b/rust/hgcli/src/hgext/mod.rs @@ -0,0 +1,129 @@ +extern crate libc; + +pub mod base85; +pub mod cpython_ext; + +use std; +use std::{env, sync}; +use std::path::{PathBuf}; +use std::ffi::{CString, OsStr}; +use python27_sys as ffi; +use cpython; + +#[cfg(target_family = "unix")] +use std::os::unix::ffi::{OsStrExt}; + +static HG_EXT_REG: sync::Once = sync::ONCE_INIT; + +#[no_mangle] +pub fn init_all_hg_ext(_py: cpython::Python) { +HG_EXT_REG.call_once(|| { +unsafe { +base85::initoxidized_base85(); +} +}); +} + +#[derive(Debug)] +pub struct Environment { +_exe: PathBuf, +python_exe: PathBuf, +python_home: PathBuf, +mercurial_modules: PathBuf, +} + +// On UNIX, platform string is just bytes and should not contain NUL. +#[cfg(target_family = "unix")] +fn cstring_from_os>(s: T) -> CString { +CString::new(s.as_ref().as_bytes()).unwrap() +} + +#[cfg(target_family = "windows")] +fn cstring_from_os>(s: T) -> CString { +CString::new(s.as_ref().to_str().unwrap()).unwrap() +} + +fn set_python_home(env: ) { +let raw = cstring_from_os(_home).into_raw(); +unsafe { +ffi::Py_SetPythonHome(raw); +} +} + +static PYTHON_ENV_START: sync::Once = sync::ONCE_INIT; + +/// the second half initialization code are copied from rust-cpython +/// fn pythonrun::prepare_freethreaded_python() +/// because this function is called mainly by `cargo test` +/// and the multi-thread nature requires to properly +/// set up threads and GIL. In the corresponding version, +/// prepare_freethreaded_python() is turned off, so the cargo +/// test features must be properly called. +pub fn set_py_env() { +PYTHON_ENV_START.call_once(|| { +let env = { +let exe = env::current_exe().unwrap(); + +let mercurial_modules = std::env::var("HGROOT").expect("must set mercurial's root folder (one layer above mercurial folder itself"); + +let python_exe = std::env::var("HGRUST_PYTHONEXE").expect("set PYTHONEXE to the full path of the python.exe file"); + +let python_home = std::env::var("HGRUST_PYTHONHOME").expect("if you don't want to use system one, set PYTHONHOME according to python doc"); + +Environment { +_exe: exe.clone(), +python_exe: PathBuf::from(python_exe), +python_home: PathBuf::from(python_home), +mercurial_modules: PathBuf::from(mercurial_modules), +} +}; + +//println!("{:?}", env); + +// Tell Python where it is installed. +set_python_home(); + +// Set program name. The backing memory needs to live for the duration of the +// interpreter. +// +// TODO consider storing this in a static or associating with lifetime of +// the Python interpreter. +// +// Yes, we use the path to the Python interpreter not argv[0] here. The +// reason is because Python uses the given path to find the location of +// Python files. Apparently we could define our own ``Py_GetPath()`` +// implementation. But this may require statically linking Python, which is +// not desirable. +let program_name = cstring_from_os(_exe).as_ptr(); +unsafe { +ffi::Py_SetProgramName(program_name as *mut i8); +} + +unsafe { +//ffi::Py_Initialize(); + +if ffi::Py_IsInitialized() != 0 { +// If Python is already initialized, we expect Python threading to also be initialized, +// as we can't make the existing Python main thread acquire the GIL. +assert!(ffi::PyEval_ThreadsInitialized() != 0); +} else { +//