Ivzhh created this revision.
Herald added subscribers: mercurial-devel, kevincox, durin42.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  - python extension to encode/decode base85
  - add test suits to call encode/decode base85 in rust-/python- convention
  - add proper python environmental setup for developer with multiple python
  
  environment (e.g. conda 2/3 for data processing etc.). Environmental version 
is
  more controllable.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D2057

AFFECTED FILES
  rust/hgcli/src/hgext/base85.rs
  rust/hgcli/src/hgext/cpython_ext.rs
  rust/hgcli/src/hgext/mod.rs
  rust/hgcli/src/main.rs

CHANGE DETAILS

diff --git a/rust/hgcli/src/main.rs b/rust/hgcli/src/main.rs
--- a/rust/hgcli/src/main.rs
+++ b/rust/hgcli/src/main.rs
@@ -6,9 +6,11 @@
 // GNU General Public License version 2 or any later version.
 
 extern crate libc;
-extern crate cpython;
+#[macro_use] extern crate cpython;
 extern crate python27_sys;
 
+pub mod hgext;
+
 use cpython::{NoArgs, ObjectProtocol, PyModule, PyResult, Python};
 use libc::{c_char, c_int};
 
diff --git a/rust/hgcli/src/hgext/mod.rs b/rust/hgcli/src/hgext/mod.rs
new file mode 100644
--- /dev/null
+++ b/rust/hgcli/src/hgext/mod.rs
@@ -0,0 +1,129 @@
+extern crate libc;
+
+pub mod base85;
+pub mod cpython_ext;
+
+use std;
+use std::{env, sync};
+use std::path::{PathBuf};
+use std::ffi::{CString, OsStr};
+use python27_sys as ffi;
+use cpython;
+
+#[cfg(target_family = "unix")]
+use std::os::unix::ffi::{OsStrExt};
+
+static HG_EXT_REG: sync::Once = sync::ONCE_INIT;
+
+#[no_mangle]
+pub fn init_all_hg_ext(_py: cpython::Python) {
+    HG_EXT_REG.call_once(|| {
+        unsafe {
+            base85::initoxidized_base85();
+        }
+    });
+}
+
+#[derive(Debug)]
+pub struct Environment {
+    _exe: PathBuf,
+    python_exe: PathBuf,
+    python_home: PathBuf,
+    mercurial_modules: PathBuf,
+}
+
+// On UNIX, platform string is just bytes and should not contain NUL.
+#[cfg(target_family = "unix")]
+fn cstring_from_os<T: AsRef<OsStr>>(s: T) -> CString {
+    CString::new(s.as_ref().as_bytes()).unwrap()
+}
+
+#[cfg(target_family = "windows")]
+fn cstring_from_os<T: AsRef<OsStr>>(s: T) -> CString {
+    CString::new(s.as_ref().to_str().unwrap()).unwrap()
+}
+
+fn set_python_home(env: &Environment) {
+    let raw = cstring_from_os(&env.python_home).into_raw();
+    unsafe {
+        ffi::Py_SetPythonHome(raw);
+    }
+}
+
+static PYTHON_ENV_START: sync::Once = sync::ONCE_INIT;
+
+/// the second half initialization code are copied from rust-cpython
+/// fn pythonrun::prepare_freethreaded_python()
+/// because this function is called mainly by `cargo test`
+/// and the multi-thread nature requires to properly
+/// set up threads and GIL. In the corresponding version,
+/// prepare_freethreaded_python() is turned off, so the cargo
+/// test features must be properly called.
+pub fn set_py_env() {
+    PYTHON_ENV_START.call_once(|| {
+        let env = {
+            let exe = env::current_exe().unwrap();
+
+            let mercurial_modules = std::env::var("HGROOT").expect("must set 
mercurial's root folder (one layer above mercurial folder itself");
+
+            let python_exe = std::env::var("HGRUST_PYTHONEXE").expect("set 
PYTHONEXE to the full path of the python.exe file");
+
+            let python_home = std::env::var("HGRUST_PYTHONHOME").expect("if 
you don't want to use system one, set PYTHONHOME according to python doc");
+
+            Environment {
+                _exe: exe.clone(),
+                python_exe: PathBuf::from(python_exe),
+                python_home: PathBuf::from(python_home),
+                mercurial_modules: PathBuf::from(mercurial_modules),
+            }
+        };
+
+        //println!("{:?}", env);
+
+        // Tell Python where it is installed.
+        set_python_home(&env);
+
+        // Set program name. The backing memory needs to live for the duration 
of the
+        // interpreter.
+        //
+        // TODO consider storing this in a static or associating with lifetime 
of
+        // the Python interpreter.
+        //
+        // Yes, we use the path to the Python interpreter not argv[0] here. The
+        // reason is because Python uses the given path to find the location of
+        // Python files. Apparently we could define our own ``Py_GetPath()``
+        // implementation. But this may require statically linking Python, 
which is
+        // not desirable.
+        let program_name = cstring_from_os(&env.python_exe).as_ptr();
+        unsafe {
+            ffi::Py_SetProgramName(program_name as *mut i8);
+        }
+
+        unsafe {
+            //ffi::Py_Initialize();
+
+            if ffi::Py_IsInitialized() != 0 {
+                // If Python is already initialized, we expect Python 
threading to also be initialized,
+                // as we can't make the existing Python main thread acquire 
the GIL.
+                assert!(ffi::PyEval_ThreadsInitialized() != 0);
+            } else {
+                // If Python isn't initialized yet, we expect that Python 
threading isn't initialized either.
+                assert!(ffi::PyEval_ThreadsInitialized() == 0);
+                // Initialize Python.
+                // We use Py_InitializeEx() with initsigs=0 to disable Python 
signal handling.
+                // Signal handling depends on the notion of a 'main thread', 
which doesn't exist in this case.
+                // Note that the 'main thread' notion in Python isn't 
documented properly;
+                // and running Python without one is not officially supported.
+                ffi::Py_InitializeEx(0);
+                ffi::PyEval_InitThreads();
+                // PyEval_InitThreads() will acquire the GIL,
+                // but we don't want to hold it at this point
+                // (it's not acquired in the other code paths)
+                // So immediately release the GIL:
+                let _thread_state = ffi::PyEval_SaveThread();
+                // Note that the PyThreadState returned by PyEval_SaveThread 
is also held in TLS by the Python runtime,
+                // and will be restored by PyGILState_Ensure.
+            }
+        }
+    });
+}
diff --git a/rust/hgcli/src/hgext/cpython_ext.rs 
b/rust/hgcli/src/hgext/cpython_ext.rs
new file mode 100644
--- /dev/null
+++ b/rust/hgcli/src/hgext/cpython_ext.rs
@@ -0,0 +1,25 @@
+use cpython::{PyObject, Python, Py_ssize_t, PyBytes, 
PythonObjectWithCheckedDowncast};
+
+use python27_sys as ffi;
+
+use std;
+
+#[inline]
+pub unsafe fn cast_from_owned_ptr_or_panic<T>(py : Python, p : *mut 
ffi::PyObject) -> T
+    where T: PythonObjectWithCheckedDowncast {
+    if p.is_null() {
+        panic!("NULL pointer detected.")
+    } else {
+        PyObject::from_owned_ptr(py, p).cast_into(py).unwrap()
+    }
+}
+
+pub fn pybytes_new_without_copying(py: Python, len: Py_ssize_t) -> PyBytes {
+    unsafe {
+        if len <= 0 {
+            panic!("the request bytes length should be > 0.")
+        }
+        cast_from_owned_ptr_or_panic(py,
+                                     
ffi::PyBytes_FromStringAndSize(std::ptr::null(), len))
+    }
+}
\ No newline at end of file
diff --git a/rust/hgcli/src/hgext/base85.rs b/rust/hgcli/src/hgext/base85.rs
new file mode 100644
--- /dev/null
+++ b/rust/hgcli/src/hgext/base85.rs
@@ -0,0 +1,317 @@
+use cpython::{PyObject, PyResult, Python, Py_ssize_t, PythonObject, PyBytes, 
PyErr, exc};
+use cpython::_detail::ffi;
+
+use std;
+use std::{mem, sync};
+use super::cpython_ext;
+
+const B85CHARS: &[u8; 85] = 
b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~";
+static mut B85DEC: [u8; 256] = [0; 256];
+static B85DEC_START: sync::Once = sync::ONCE_INIT;
+
+fn b85prep() {
+    B85DEC_START.call_once(|| {
+        for i in 0..mem::size_of_val(B85CHARS) {
+            unsafe {
+                B85DEC[B85CHARS[i] as usize] = (i + 1) as u8;
+            }
+        }
+    });
+}
+
+pub fn b85encode(py: Python, text: &str, pad: i32) -> PyResult<PyObject> {
+    let text = text.as_bytes();
+    let tlen: Py_ssize_t = { text.len() as Py_ssize_t};
+    let olen: Py_ssize_t = if pad != 0 {
+        ((tlen + 3) / 4 * 5) - 3
+    } else {
+        let mut olen: Py_ssize_t = tlen % 4;
+        if olen > 0 {
+            olen += 1;
+        }
+        olen += tlen / 4 * 5;
+        olen
+    };
+
+    let out: PyBytes = cpython_ext::pybytes_new_without_copying(py, olen + 3);
+
+    let dst = unsafe {
+        let buffer = ffi::PyBytes_AsString(out.as_object().as_ptr()) as *mut 
u8;
+        let length = ffi::PyBytes_Size(out.as_object().as_ptr()) as usize;
+        std::slice::from_raw_parts_mut(buffer, length)
+    };
+
+    let mut ptext = &text[..];
+    let mut len = { ptext.len() };
+    let mut dst_off: usize = 0;
+
+    loop {
+
+        if len == 0 {
+            break;
+        }
+
+        let mut acc: u32 = 0;
+
+        for i in [24, 16, 8, 0].iter() {
+            let ch = ptext[0] as u32;
+            acc |= ch << i;
+
+            ptext = &ptext[1..];
+            len -= 1;
+
+            if len == 0 {
+                break;
+            }
+        }
+
+        for i in [4, 3, 2, 1, 0].iter() {
+            let val: usize = (acc % 85) as usize;
+            acc /= 85;
+
+            dst[*i + dst_off] = B85CHARS[val];
+        }
+
+        dst_off += 5;
+    }
+
+    if pad == 0 {
+        unsafe { ffi::_PyString_Resize(&mut out.as_object().as_ptr() as *mut 
*mut ffi::PyObject, olen); }
+    }
+
+    return Ok(out.into_object());
+}
+
+pub fn b85decode(py: Python, text: &str) -> PyResult<PyObject> {
+    let b85dec = unsafe { B85DEC };
+
+    let text = text.as_bytes();
+    let len = { text.len() };
+    let mut ptext = &text[..];
+    let i = len % 5;
+    let olen_g: usize = len / 5 * 4 + {
+        if i > 0 {
+            i - 1
+        } else {
+            0
+        }};
+
+    let out: PyBytes = cpython_ext::pybytes_new_without_copying(py, olen_g as 
Py_ssize_t);
+
+    let dst = unsafe {
+        let buffer = ffi::PyBytes_AsString(out.as_object().as_ptr()) as *mut 
u8;
+        let length = ffi::PyBytes_Size(out.as_object().as_ptr()) as usize;
+        std::slice::from_raw_parts_mut(buffer, length)
+    };
+    let mut dst_off = 0;
+
+    let mut i = 0;
+    while i < len {
+        let mut acc: u32 = 0;
+        let mut cap = len - i - 1;
+        if cap > 4 {
+            cap = 4
+        }
+        for _ in 0..cap {
+            let c = b85dec[ptext[0] as usize] as i32 - 1;
+            ptext = &ptext[1..];
+            if c < 0 {
+                return Err(PyErr::new::<exc::ValueError, _>(py, format!("bad 
base85 character at position {}", i)));
+            }
+            acc = acc * 85 + (c as u32);
+            i += 1;
+        }
+        if i < len {
+            i += 1;
+            let c = b85dec[ptext[0] as usize] as i32 - 1;
+            ptext = &ptext[1..];
+            if c < 0 {
+                return Err(PyErr::new::<exc::ValueError, _>(py, format!("bad 
base85 character at position {}", i)));
+            }
+            /* overflow detection: 0xffffffff == "|NsC0",
+                        * "|NsC" == 0x03030303 */
+            if acc > 0x03030303 {
+                return Err(PyErr::new::<exc::ValueError, _>(py, format!("bad 
base85 character at position {}", i)));
+            }
+
+            acc *= 85;
+
+            if acc > (0xffffffff_u32 - (c as u32)) {
+                return Err(PyErr::new::<exc::ValueError, _>(py, format!("bad 
base85 character at position {}", i)));
+            }
+            acc += c as u32;
+        }
+
+        let olen = olen_g - dst_off;
+
+        cap = if olen < 4 {
+            olen
+        } else {
+            4
+        };
+
+        for _ in 0..(4 - cap) {
+            acc *= 85;
+        }
+
+        if (cap > 0) && (cap < 4) {
+            acc += 0xffffff >> (cap - 1) * 8;
+        }
+
+        for j in 0..cap{
+            acc = (acc << 8) | (acc >> 24);
+            dst[j + dst_off] = acc as u8;
+        }
+
+        dst_off += cap;
+    }
+
+    if dst[olen_g - 1] == b'\0' {
+        unsafe { ffi::_PyString_Resize(&mut out.as_object().as_ptr() as *mut 
*mut ffi::PyObject, (olen_g - 1) as Py_ssize_t); }
+    }
+
+    return Ok(out.into_object());
+}
+
+py_module_initializer!(oxidized_base85, initoxidized_base85, 
PyInit_oxidized_base85, |py, m| {
+    b85prep();
+    m.add(py, "__doc__", "Oxidized base85 module")?;
+    m.add(py, "b85encode", py_fn!(py, b85encode(text: &str, pad: i32)))?;
+    m.add(py, "b85decode", py_fn!(py, b85decode(text: &str)))?;
+    Ok(())
+});
+
+#[cfg(test)]
+mod test {
+    use hgext;
+    use cpython::Python;
+
+
+    #[test]
+    fn test_encoder_abc_pad() -> () {
+        hgext::set_py_env();
+
+        let gil = Python::acquire_gil();
+        let py = gil.python();
+        hgext::init_all_hg_ext(py);
+
+        let res: String = super::b85encode(py, "abc", 
1).unwrap().extract(py).unwrap();
+        assert_eq!(res, "VPazd");
+
+        let base85 = py.import("oxidized_base85").unwrap();
+        let res: String = base85.call(py,"b85encode", ("abc", 1), 
None).unwrap().extract(py).unwrap();
+        assert_eq!(res, "VPazd");
+    }
+
+    #[test]
+    fn test_encoder_chinese_pad() -> () {
+        hgext::set_py_env();
+
+        let gil = Python::acquire_gil();
+        let py = gil.python();
+        hgext::init_all_hg_ext(py);
+
+        let res: String = super::b85encode(py, "这是一个测试的例子", 
1).unwrap().extract(py).unwrap();
+        assert_eq!(res, "=)alfn6KoxfaJKU=CzCHua)PTgyg=9<*kqa");
+
+        let base85 = py.import("oxidized_base85").unwrap();
+        let res: String = base85.call(py,"b85encode", ("这是一个测试的例子", 1), 
None).unwrap().extract(py).unwrap();
+        assert_eq!(res, "=)alfn6KoxfaJKU=CzCHua)PTgyg=9<*kqa");
+    }
+
+    #[test]
+    fn test_encoder_abc_no_pad() -> () {
+        hgext::set_py_env();
+
+        let gil = Python::acquire_gil();
+        let py = gil.python();
+        hgext::init_all_hg_ext(py);
+
+        let res: String = super::b85encode(py, "abc", 
0).unwrap().extract(py).unwrap();
+        assert_eq!(res, "VPaz");
+
+        let base85 = py.import("oxidized_base85").unwrap();
+        let res: String = base85.call(py,"b85encode", ("abc", 0), 
None).unwrap().extract(py).unwrap();
+        assert_eq!(res, "VPaz");
+    }
+
+    #[test]
+    fn test_encoder_chinese_no_pad() -> () {
+        hgext::set_py_env();
+
+        let gil = Python::acquire_gil();
+        let py = gil.python();
+        hgext::init_all_hg_ext(py);
+
+        let res: String = super::b85encode(py, "这是一个测试的例子", 
0).unwrap().extract(py).unwrap();
+        assert_eq!(res, "=)alfn6KoxfaJKU=CzCHua)PTgyg=9<*kq");
+
+        let base85 = py.import("oxidized_base85").unwrap();
+        let res: String = base85.call(py, "b85encode", ("这是一个测试的例子", 0), 
None).unwrap().extract(py).unwrap();
+        assert_eq!(res, "=)alfn6KoxfaJKU=CzCHua)PTgyg=9<*kq");
+    }
+
+    #[test]
+    fn test_decoder_abc_no_pad() -> () {
+        hgext::set_py_env();
+
+        let gil = Python::acquire_gil();
+        let py = gil.python();
+        hgext::init_all_hg_ext(py);
+
+        let res: String = super::b85decode(py, 
"VPaz").unwrap().extract(py).unwrap();
+        assert_eq!(res, "abc");
+
+        let base85 = py.import("oxidized_base85").unwrap();
+        let res: String = base85.call(py, "b85decode", ("VPaz", ), 
None).unwrap().extract(py).unwrap();
+        assert_eq!(res, "abc");
+    }
+
+    #[test]
+    fn test_decoder_abc_pad() -> () {
+        hgext::set_py_env();
+
+        let gil = Python::acquire_gil();
+        let py = gil.python();
+        hgext::init_all_hg_ext(py);
+
+        let res: String = super::b85decode(py, 
"VPazd").unwrap().extract(py).unwrap();
+        assert_eq!(res, "abc");
+
+        let base85 = py.import("oxidized_base85").unwrap();
+        let res: String = base85.call(py, "b85decode", ("VPazd", ), 
None).unwrap().extract(py).unwrap();
+        assert_eq!(res, "abc");
+    }
+
+    #[test]
+    fn test_decoder_chinese_pad() -> () {
+        hgext::set_py_env();
+
+        let gil = Python::acquire_gil();
+        let py = gil.python();
+        hgext::init_all_hg_ext(py);
+
+        let res: String = super::b85decode(py, 
"=)alfn6KoxfaJKU=CzCHua)PTgyg=9<*kqa").unwrap().extract(py).unwrap();
+        assert_eq!(res, "这是一个测试的例子");
+
+        let base85 = py.import("oxidized_base85").unwrap();
+        let res: String = base85.call(py, "b85decode", 
("=)alfn6KoxfaJKU=CzCHua)PTgyg=9<*kqa", ), None).unwrap().extract(py).unwrap();
+        assert_eq!(res, "这是一个测试的例子");
+    }
+
+    #[test]
+    fn test_decoder_chinese_no_pad() -> () {
+        hgext::set_py_env();
+
+        let gil = Python::acquire_gil();
+        let py = gil.python();
+        hgext::init_all_hg_ext(py);
+
+        let res: String = super::b85decode(py, 
"=)alfn6KoxfaJKU=CzCHua)PTgyg=9<*kq").unwrap().extract(py).unwrap();
+        assert_eq!(res, "这是一个测试的例子");
+
+        let base85 = py.import("oxidized_base85").unwrap();
+        let res: String = base85.call(py, "b85decode", 
("=)alfn6KoxfaJKU=CzCHua)PTgyg=9<*kq", ), None).unwrap().extract(py).unwrap();
+        assert_eq!(res, "这是一个测试的例子");
+    }
+}



To: Ivzhh, #hg-reviewers
Cc: durin42, kevincox, mercurial-devel
_______________________________________________
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel

Reply via email to