HappenLee commented on code in PR #57329: URL: https://github.com/apache/doris/pull/57329#discussion_r2471954079
########## be/src/udf/python/python_env.cpp: ########## @@ -0,0 +1,299 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "python_env.h" + +#include <fmt/core.h> + +#include <filesystem> +#include <memory> +#include <regex> +#include <vector> + +#include "common/status.h" +#include "udf/python/python_udf_server.h" +#include "util/string_util.h" + +namespace doris { + +namespace fs = std::filesystem; + +// extract python version by executing `python --version` and extract "3.9.16" from "Python 3.9.16" +// @param python_path: path to python executable, e.g. "/opt/miniconda3/envs/myenv/bin/python" +// @param version: extracted python version, e.g. "3.9.16" +static Status extract_python_version(const std::string& python_path, std::string* version) { + static std::regex python_version_re(R"(^Python (\d+\.\d+\.\d+))"); + + if (!fs::exists(python_path)) { + return Status::NotFound("Python executable not found: {}", python_path); + } + + std::string cmd = fmt::format("\"{}\" --version", python_path); + FILE* pipe = popen(cmd.c_str(), "r"); + if (!pipe) { + return Status::InternalError("Failed to run: {}", cmd); + } + + std::string result; + char buf[128]; + while (fgets(buf, sizeof(buf), pipe)) { + result += buf; + } + pclose(pipe); + + std::smatch match; + if (std::regex_search(result, match, python_version_re)) { + *version = match[1].str(); + return Status::OK(); + } + + return Status::InternalError("Failed to extract Python version from path: {}, result: {}", + python_path, result); +} + +PythonEnvironment::PythonEnvironment(const std::string& name, const PythonVersion& python_version) + : env_name(name), python_version(python_version) {} + +std::string PythonEnvironment::to_string() const { + return fmt::format( + "[env_name: {}, env_base_path: {}, python_base_path: {}, python_full_version: {}, " + "python_major_minor_version: {}]", + env_name, python_version.base_path, python_version.executable_path, + python_version.full_version, python_version.version); +} + +bool PythonEnvironment::is_valid() const { + if (!python_version.is_valid()) return false; + + auto perms = fs::status(python_version.executable_path).permissions(); + if ((perms & fs::perms::owner_exec) == fs::perms::none) { + return false; + } + + std::string version; + if (!extract_python_version(python_version.executable_path, &version).ok()) { + LOG(WARNING) << "Failed to extract python version from path: " + << python_version.executable_path; + return false; + } + + return python_version.full_version == version && + python_version.version == version.substr(0, version.find_last_of('.')); +} + +// Scan for environments under the /{conda_root_path}/envs directory from the conda root. +Status PythonEnvironment::scan_from_conda_root_path(const fs::path& conda_root_path, + std::vector<PythonEnvironment>* environments) { + DCHECK(!conda_root_path.empty() && environments != nullptr); + + fs::path envs_dir = conda_root_path / "envs"; + if (!fs::exists(envs_dir) || !fs::is_directory(envs_dir)) { + return Status::NotFound("Conda envs directory not found: {}", envs_dir.string()); + } + + for (const auto& entry : fs::directory_iterator(envs_dir)) { + if (!entry.is_directory()) continue; + + std::string env_name = entry.path().filename(); // e.g. "myenv" + std::string env_base_path = entry.path(); // e.g. "/opt/miniconda3/envs/myenv" + std::string python_path = + env_base_path + "/bin/python"; // e.g. "/{env_base_path}/bin/python" + std::string python_full_version; // e.g. "3.9.16" + RETURN_IF_ERROR(extract_python_version(python_path, &python_full_version)); + size_t pos = python_full_version.find_last_of('.'); + + if (UNLIKELY(pos == std::string::npos)) { + return Status::InvalidArgument("Invalid python version: {}", python_full_version); + } + + std::string python_major_minor_version = python_full_version.substr(0, pos); // e.g. "3.9" + PythonVersion python_version(python_major_minor_version, python_full_version, env_base_path, + python_path); + PythonEnvironment conda_env(env_name, python_version); + + if (UNLIKELY(!conda_env.is_valid())) { + LOG(WARNING) << "Invalid conda environment: " << conda_env.to_string(); + continue; + } + + environments->push_back(std::move(conda_env)); + } + + return Status::OK(); +} + +Status PythonEnvironment::scan_from_venv_root_path( + const fs::path& venv_root_path, const std::vector<std::string>& interpreter_paths, + std::vector<PythonEnvironment>* environments) { + DCHECK(!venv_root_path.empty() && environments != nullptr); + + for (const auto& interpreter_path : interpreter_paths) { + if (!fs::exists(interpreter_path) || !fs::is_regular_file(interpreter_path)) { + return Status::NotFound("Interpreter path not found: {}", interpreter_path); + } + std::string python_full_version; + RETURN_IF_ERROR(extract_python_version(interpreter_path, &python_full_version)); + size_t pos = python_full_version.find_last_of('.'); + if (UNLIKELY(pos == std::string::npos)) { + return Status::InvalidArgument("Invalid python version: {}", python_full_version); + } + std::string env_name = fmt::format("python{}", python_full_version); // e.g. "python3.9.16" + std::string env_base_path = fmt::format("{}/{}", venv_root_path.string(), + env_name); // e.g. "/opt/venv/python3.9.16" + std::string python_path = + fmt::format("{}/bin/python", env_base_path); // e.g. "/{venv_base_path}/bin/python" + + if (!fs::exists(env_base_path) || !fs::exists(python_path)) { + fs::create_directories(env_base_path); + std::string create_venv_cmd = + fmt::format("{} -m venv --copies {}", interpreter_path, env_base_path); + + if (system(create_venv_cmd.c_str()) != 0 || !fs::exists(python_path)) { + return Status::RuntimeError("Failed to create python virtual environment, cmd: {}", + create_venv_cmd); + } + } + + std::string python_major_minor_version = python_full_version.substr(0, pos); // e.g. "3.9" + std::string python_dependency_path = fmt::format( + "{}/lib/python{}/site-packages", env_base_path, + python_major_minor_version); // e.g. "/{venv_base_path}/lib/python3.9/site-packages" + + if (!fs::exists(python_dependency_path)) { + return Status::NotFound("Python dependency path not found: {}", python_dependency_path); + } + + PythonVersion python_version(python_major_minor_version, python_full_version, env_base_path, + python_path); + PythonEnvironment venv_env(env_name, python_version); + + if (UNLIKELY(!venv_env.is_valid())) { + LOG(WARNING) << "Invalid venv environment: " << venv_env.to_string(); + continue; + } + + environments->push_back(std::move(venv_env)); + } + + return Status::OK(); +} + +Status PythonEnvScanner::get_versions(std::vector<PythonVersion>* versions) const { + DCHECK(versions != nullptr); + if (_envs.empty()) { + return Status::InternalError("not found available version"); + } + for (const auto& env : _envs) { + versions->push_back(env.python_version); + } + return Status::OK(); +} + +Status PythonEnvScanner::get_version(const std::string& runtime_version, + PythonVersion* version) const { + if (_envs.empty()) { + return Status::InternalError("not found available version"); + } + std::string_view runtime_version_view(runtime_version); + runtime_version_view = trim(runtime_version_view); + for (const auto& env : _envs) { + if (env.python_version.full_version.starts_with(runtime_version_view)) { + *version = env.python_version; + return Status::OK(); + } + } + return Status::NotFound("not found runtime version: {}", runtime_version); +} + +Status PythonEnvScanner::default_version(PythonVersion* version) const { + if (_envs.empty()) { + return Status::InternalError("not found available version"); + } + *version = _envs.begin()->python_version; + return Status::OK(); +} + +Status CondaEnvScanner::scan() { + RETURN_IF_ERROR(PythonEnvironment::scan_from_conda_root_path(_env_root_path, &_envs)); + if (_envs.empty()) { + return Status::NotFound("No conda environments found"); Review Comment: why return the error status in line 231 func and just ``` return PythonEnvironment::scan_from_conda_root_path(_env_root_path, &_envs); ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
