This is an automated email from the ASF dual-hosted git repository. todd pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 214f61a180580e56019af141ded3b196ba5e10cf Author: Philip Zeyliger <phi...@cloudera.com> AuthorDate: Mon Feb 25 15:48:27 2019 -0800 IMPALA-8250: Clean up JNI warnings. Using LIBHDFS_OPTS+="-Xcheck:jni" revealed a handful of warnings related to (a) checking for exceptions and (b) leaking local references. Checking for exceptions required sprinkling RETURN_ERROR_IF_EXC left and right. I chose not to expand the JniCall infrastructure to handle this more generally at the moment. The leaky local references are a bit harder. In the logs, they show up as "WARNING: JNI local refs: 2597, exceeds capacity: 35" or similar. A few of these errors seem to be not in our code. The ones that I've found in our code stemmed from HBaseTableScanner::GetRowKey(): this method uses local references and wasn't returning them. Using a JniLocalFrame seems to have taken care of the warnings. I have added code to skip test_large_strings when JNI checking is enabled. This test takes forever (presumably because JNI is checking bounds on strings very aggressively), and times out. The time out also causes some metric-related checks to fail (since a query is still in flight). Debugging this required customizing my JDK to give stack traces when these warnings occurred. The following diff facilitated this. diff -r 76a9c9cf14f1 src/share/vm/prims/jniCheck.cpp --- a/src/share/vm/prims/jniCheck.cpp Tue Jan 15 10:43:31 2019 +0000 +++ b/src/share/vm/prims/jniCheck.cpp Wed Feb 27 11:57:13 2019 -0800 @@ -143,11 +143,30 @@ static const char * fatal_instance_field_mismatch = "Field type (instance) mismatch in JNI get/set field operations"; static const char * fatal_non_string = "JNI string operation received a non-string"; +// thisone: whether to print every time, or maybe, depending on future +// how many future stacks we want printed (totally racy); helps catch +// missing exception handling if there's a way to tickle that code +// reliably. +static inline void dump_native_stack(JavaThread* thr, bool thisone, int future) { + static int fut_stacks = 0; // racy! + if (fut_stacks > 0) { + thisone = true; + fut_stacks--; + } + if (future > 0) fut_stacks = future; + if (thisone) { + frame fr = os::current_frame(); + char buf[6000]; + tty->print_cr("Thread: %s %d", thr->get_thread_name(), thr->osthread()->thread_id()); + print_native_stack(tty, fr, thr, buf, sizeof(buf)); + } +} // When in VM state: static void ReportJNIWarning(JavaThread* thr, const char *msg) { tty->print_cr("WARNING in native method: %s", msg); thr->print_stack(); + dump_native_stack(thr, true, 0); } // When in NATIVE state: @@ -199,11 +218,14 @@ tty->print_cr("WARNING in native method: JNI call made without checking exceptions when required to from %s", thr->get_pending_jni_exception_check()); thr->print_stack(); + dump_native_stack(thr, true, 10); ) thr->clear_pending_jni_exception_check(); // Just complain once } } + + /** * Add to the planned number of handles. I.e. plus current live & warning threshold */ @@ -254,9 +276,12 @@ tty->print_cr("WARNING: JNI local refs: %zu, exceeds capacity: %zu", live_handles, planned_capacity); thr->print_stack(); + dump_native_stack(thr, true, 0); ) // Complain just the once, reset to current + warn threshold add_planned_handle_capacity(handles, 0); + } else { + dump_native_stack(thr, false, 0); } } Change-Id: Idd1709f749a764c1d947704bc64306493863b45f Reviewed-on: http://gerrit.cloudera.org:8080/12660 Reviewed-by: Joe McDonnell <joemcdonn...@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> --- be/src/catalog/catalog.cc | 1 + be/src/exec/hbase-table-scanner.cc | 41 ++++++++++++++++++++++++++++++++--- be/src/exprs/hive-udf-call.cc | 2 ++ be/src/runtime/hbase-table-factory.cc | 3 +++ be/src/service/frontend.cc | 11 +++++++--- be/src/service/frontend.h | 3 --- tests/query_test/test_insert.py | 4 +++- 7 files changed, 55 insertions(+), 10 deletions(-) diff --git a/be/src/catalog/catalog.cc b/be/src/catalog/catalog.cc index 8efebd9..c6becc3 100644 --- a/be/src/catalog/catalog.cc +++ b/be/src/catalog/catalog.cc @@ -107,6 +107,7 @@ Status Catalog::GetCatalogVersion(long* version) { JniLocalFrame jni_frame; RETURN_IF_ERROR(jni_frame.push(jni_env)); *version = jni_env->CallLongMethod(catalog_, get_catalog_version_id_); + RETURN_ERROR_IF_EXC(jni_env); return Status::OK(); } diff --git a/be/src/exec/hbase-table-scanner.cc b/be/src/exec/hbase-table-scanner.cc index 4746930..d0b072c 100644 --- a/be/src/exec/hbase-table-scanner.cc +++ b/be/src/exec/hbase-table-scanner.cc @@ -253,6 +253,7 @@ Status HBaseTableScanner::Init() { env->GetStaticFieldID(hconstants_cl_, "EMPTY_START_ROW", "[B"); RETURN_ERROR_IF_EXC(env); empty_row_ = env->GetStaticObjectField(hconstants_cl_, empty_start_row_id); + RETURN_ERROR_IF_EXC(env); RETURN_IF_ERROR(JniUtil::LocalToGlobalRef(env, empty_row_, &empty_row_)); // FilterList method ids. @@ -268,6 +269,7 @@ Status HBaseTableScanner::Init() { "Lorg/apache/hadoop/hbase/filter/FilterList$Operator;"); RETURN_ERROR_IF_EXC(env); must_pass_all_op_ = env->GetStaticObjectField(filter_list_op_cl_, must_pass_all_id); + RETURN_ERROR_IF_EXC(env); RETURN_IF_ERROR(JniUtil::LocalToGlobalRef(env, must_pass_all_op_, &must_pass_all_op_)); // SingleColumnValueFilter method ids. @@ -282,6 +284,7 @@ Status HBaseTableScanner::Init() { RETURN_ERROR_IF_EXC(env); compare_ops_ = (jobjectArray) env->CallStaticObjectMethod(compare_op_cl_, compare_op_values); + RETURN_ERROR_IF_EXC(env); RETURN_IF_ERROR(JniUtil::LocalToGlobalRef(env, reinterpret_cast<jobject>(compare_ops_), reinterpret_cast<jobject*>(&compare_ops_))); @@ -425,10 +428,12 @@ Status HBaseTableScanner::HandleResultScannerTimeout(JNIEnv* env, bool* timeout) RETURN_IF_ERROR(jni_frame.push(env)); DCHECK_LT(cell_index_, num_cells_); jobject cell = env->GetObjectArrayElement(cells_, cell_index_); + RETURN_ERROR_IF_EXC(env); // Specifically set the start_bytes to the next row since some of them were already // read jbyteArray start_bytes = (jbyteArray) env->CallObjectMethod(cell, cell_get_row_array_); + RETURN_ERROR_IF_EXC(env); jbyteArray end_bytes; RETURN_IF_ERROR(CreateByteArray(env, scan_range.stop_key(), &end_bytes)); return InitScanRange(env, start_bytes, end_bytes); @@ -510,6 +515,7 @@ Status HBaseTableScanner::Next(JNIEnv* env, bool* has_next) { DCHECK(resultscanner_ != NULL); // result_ = resultscanner_.next(); result = env->CallObjectMethod(resultscanner_, resultscanner_next_id_); + RETURN_ERROR_IF_EXC(env); // Normally we would check for a JNI exception via RETURN_ERROR_IF_EXC, but we // need to also check for scanner timeouts and handle them specially, which is // done by HandleResultScannerTimeout(). If a timeout occurred, then it will @@ -530,9 +536,12 @@ Status HBaseTableScanner::Next(JNIEnv* env, bool* has_next) { } // Ignore empty rows - if (result != NULL && - JNI_TRUE == env->CallBooleanMethod(result, result_isempty_id_)) { - continue; + if (result != NULL) { + bool isEmpty = JNI_TRUE == env->CallBooleanMethod(result, result_isempty_id_); + RETURN_ERROR_IF_EXC(env); + if (isEmpty) { + continue; + } } break; } @@ -547,6 +556,7 @@ Status HBaseTableScanner::Next(JNIEnv* env, bool* has_next) { // cells_ = result.raw(); jobject local_cells = reinterpret_cast<jobjectArray>( env->CallObjectMethod(result, result_raw_cells_id_)); + RETURN_ERROR_IF_EXC(env); RETURN_IF_ERROR(JniUtil::LocalToGlobalRef(env, local_cells, &cells_)); num_cells_ = env->GetArrayLength(cells_); // Check that raw() didn't return more cells than expected. @@ -579,10 +589,15 @@ inline void HBaseTableScanner::WriteTupleSlot(const SlotDescriptor* slot_desc, inline Status HBaseTableScanner::GetRowKey(JNIEnv* env, jobject cell, void** data, int* length) { + JniLocalFrame frame; + RETURN_IF_ERROR(frame.push(env)); int offset = env->CallIntMethod(cell, cell_get_row_offset_id_); + RETURN_ERROR_IF_EXC(env); *length = env->CallShortMethod(cell, cell_get_row_length_id_); + RETURN_ERROR_IF_EXC(env); jbyteArray jdata = (jbyteArray) env->CallObjectMethod(cell, cell_get_row_array_); + RETURN_ERROR_IF_EXC(env); *data = value_pool_->TryAllocate(*length); if (UNLIKELY(*data == NULL)) { string details = Substitute(HBASE_MEM_LIMIT_EXCEEDED, "GetRowKey", @@ -590,6 +605,7 @@ inline Status HBaseTableScanner::GetRowKey(JNIEnv* env, jobject cell, return value_pool_->mem_tracker()->MemLimitExceeded(state_, details, *length); } env->GetByteArrayRegion(jdata, offset, *length, reinterpret_cast<jbyte*>(*data)); + RETURN_ERROR_IF_EXC(env); COUNTER_ADD(scan_node_->bytes_read_counter(), *length); return Status::OK(); } @@ -597,9 +613,12 @@ inline Status HBaseTableScanner::GetRowKey(JNIEnv* env, jobject cell, inline Status HBaseTableScanner::GetFamily(JNIEnv* env, jobject cell, void** data, int* length) { int offset = env->CallIntMethod(cell, cell_get_family_offset_id_); + RETURN_ERROR_IF_EXC(env); *length = env->CallShortMethod(cell, cell_get_family_length_id_); + RETURN_ERROR_IF_EXC(env); jbyteArray jdata = (jbyteArray) env->CallObjectMethod(cell, cell_get_family_array_); + RETURN_ERROR_IF_EXC(env); *data = value_pool_->TryAllocate(*length); if (UNLIKELY(*data == NULL)) { string details = Substitute(HBASE_MEM_LIMIT_EXCEEDED, "GetFamily", @@ -607,6 +626,7 @@ inline Status HBaseTableScanner::GetFamily(JNIEnv* env, jobject cell, return value_pool_->mem_tracker()->MemLimitExceeded(state_, details, *length); } env->GetByteArrayRegion(jdata, offset, *length, reinterpret_cast<jbyte*>(*data)); + RETURN_ERROR_IF_EXC(env); COUNTER_ADD(scan_node_->bytes_read_counter(), *length); return Status::OK(); } @@ -614,9 +634,12 @@ inline Status HBaseTableScanner::GetFamily(JNIEnv* env, jobject cell, inline Status HBaseTableScanner::GetQualifier(JNIEnv* env, jobject cell, void** data, int* length) { int offset = env->CallIntMethod(cell, cell_get_qualifier_offset_id_); + RETURN_ERROR_IF_EXC(env); *length = env->CallIntMethod(cell, cell_get_qualifier_length_id_); + RETURN_ERROR_IF_EXC(env); jbyteArray jdata = (jbyteArray) env->CallObjectMethod(cell, cell_get_qualifier_array_); + RETURN_ERROR_IF_EXC(env); *data = value_pool_->TryAllocate(*length); if (UNLIKELY(*data == NULL)) { string details = Substitute(HBASE_MEM_LIMIT_EXCEEDED, "GetQualifier", @@ -624,6 +647,7 @@ inline Status HBaseTableScanner::GetQualifier(JNIEnv* env, jobject cell, return value_pool_->mem_tracker()->MemLimitExceeded(state_, details, *length); } env->GetByteArrayRegion(jdata, offset, *length, reinterpret_cast<jbyte*>(*data)); + RETURN_ERROR_IF_EXC(env); COUNTER_ADD(scan_node_->bytes_read_counter(), *length); return Status::OK(); } @@ -631,9 +655,12 @@ inline Status HBaseTableScanner::GetQualifier(JNIEnv* env, jobject cell, inline Status HBaseTableScanner::GetValue(JNIEnv* env, jobject cell, void** data, int* length) { int offset = env->CallIntMethod(cell, cell_get_value_offset_id_); + RETURN_ERROR_IF_EXC(env); *length = env->CallIntMethod(cell, cell_get_value_length_id_); + RETURN_ERROR_IF_EXC(env); jbyteArray jdata = (jbyteArray) env->CallObjectMethod(cell, cell_get_value_array_); + RETURN_ERROR_IF_EXC(env); *data = value_pool_->TryAllocate(*length); if (UNLIKELY(*data == NULL)) { string details = Substitute(HBASE_MEM_LIMIT_EXCEEDED, "GetValue", @@ -641,12 +668,16 @@ inline Status HBaseTableScanner::GetValue(JNIEnv* env, jobject cell, return value_pool_->mem_tracker()->MemLimitExceeded(state_, details, *length); } env->GetByteArrayRegion(jdata, offset, *length, reinterpret_cast<jbyte*>(*data)); + RETURN_ERROR_IF_EXC(env); COUNTER_ADD(scan_node_->bytes_read_counter(), *length); return Status::OK(); } Status HBaseTableScanner::GetRowKey(JNIEnv* env, void** key, int* key_length) { + JniLocalFrame jni_frame; + RETURN_IF_ERROR(jni_frame.push(env)); jobject cell = env->GetObjectArrayElement(cells_, 0); + RETURN_ERROR_IF_EXC(env); RETURN_IF_ERROR(GetRowKey(env, cell, key, key_length)); RETURN_ERROR_IF_EXC(env); return Status::OK(); @@ -654,9 +685,12 @@ Status HBaseTableScanner::GetRowKey(JNIEnv* env, void** key, int* key_length) { Status HBaseTableScanner::GetRowKey(JNIEnv* env, const SlotDescriptor* slot_desc, Tuple* tuple) { + JniLocalFrame jni_frame; + RETURN_IF_ERROR(jni_frame.push(env)); void* key; int key_length; jobject cell = env->GetObjectArrayElement(cells_, 0); + RETURN_ERROR_IF_EXC(env); RETURN_IF_ERROR(GetRowKey(env, cell, &key, &key_length)); DCHECK_EQ(key_length, slot_desc->type().GetByteSize()); WriteTupleSlot(slot_desc, tuple, reinterpret_cast<char*>(key)); @@ -674,6 +708,7 @@ Status HBaseTableScanner::GetCurrentValue(JNIEnv* env, const string& family, JniLocalFrame jni_frame; RETURN_IF_ERROR(jni_frame.push(env)); jobject cell = env->GetObjectArrayElement(cells_, cell_index_); + RETURN_ERROR_IF_EXC(env); if (!all_cells_present_) { // Check family. If it doesn't match, we have a NULL value. void* family_data; diff --git a/be/src/exprs/hive-udf-call.cc b/be/src/exprs/hive-udf-call.cc index 3d24671..ea1827d 100644 --- a/be/src/exprs/hive-udf-call.cc +++ b/be/src/exprs/hive-udf-call.cc @@ -253,6 +253,8 @@ void HiveUdfCall::CloseEvaluator(FunctionContext::FunctionStateScope scope, if (jni_ctx->executor != NULL) { env->CallNonvirtualVoidMethodA( jni_ctx->executor, executor_cl_, executor_close_id_, NULL); + Status s = JniUtil::GetJniExceptionMsg(env); + if (!s.ok()) state->LogError(s.msg()); env->DeleteGlobalRef(jni_ctx->executor); } if (jni_ctx->input_values_buffer != NULL) { diff --git a/be/src/runtime/hbase-table-factory.cc b/be/src/runtime/hbase-table-factory.cc index 7340473..6a8b77e 100644 --- a/be/src/runtime/hbase-table-factory.cc +++ b/be/src/runtime/hbase-table-factory.cc @@ -92,6 +92,9 @@ HBaseTableFactory::~HBaseTableFactory() { lock_guard<mutex> lock(connection_lock_); if (connection_ != NULL) { env->CallObjectMethod(connection_, connection_close_id_); + Status s = JniUtil::GetJniExceptionMsg(env); + // Not much we can do with the error except log it. + if (!s.ok()) LOG(INFO) << "Exception when cleaning up HBase " << s; env->DeleteGlobalRef(connection_); connection_ = NULL; } diff --git a/be/src/service/frontend.cc b/be/src/service/frontend.cc index 1df77d4..fdf195f 100644 --- a/be/src/service/frontend.cc +++ b/be/src/service/frontend.cc @@ -107,19 +107,22 @@ Frontend::Frontend() { }; JNIEnv* jni_env = getJNIEnv(); + JniLocalFrame jni_frame; + ABORT_IF_ERROR(jni_frame.push(jni_env)); + // create instance of java class JniFrontend - fe_class_ = jni_env->FindClass("org/apache/impala/service/JniFrontend"); + jclass fe_class = jni_env->FindClass("org/apache/impala/service/JniFrontend"); ABORT_IF_EXC(jni_env); uint32_t num_methods = sizeof(methods) / sizeof(methods[0]); for (int i = 0; i < num_methods; ++i) { - ABORT_IF_ERROR(JniUtil::LoadJniMethod(jni_env, fe_class_, &(methods[i]))); + ABORT_IF_ERROR(JniUtil::LoadJniMethod(jni_env, fe_class, &(methods[i]))); }; jbyteArray cfg_bytes; ABORT_IF_ERROR(GetThriftBackendGflags(jni_env, &cfg_bytes)); - jobject fe = jni_env->NewObject(fe_class_, fe_ctor_, cfg_bytes); + jobject fe = jni_env->NewObject(fe_class, fe_ctor_, cfg_bytes); ABORT_IF_EXC(jni_env); ABORT_IF_ERROR(JniUtil::LocalToGlobalRef(jni_env, fe, &fe_)); } @@ -263,6 +266,7 @@ bool Frontend::IsAuthorizationError(const Status& status) { void Frontend::SetCatalogIsReady() { JNIEnv* jni_env = getJNIEnv(); jni_env->CallVoidMethod(fe_, set_catalog_is_ready_id_); + ABORT_IF_EXC(jni_env); } void Frontend::WaitForCatalog() { @@ -273,6 +277,7 @@ void Frontend::WaitForCatalog() { #endif JNIEnv* jni_env = getJNIEnv(); jni_env->CallVoidMethod(fe_, wait_for_catalog_id_); + ABORT_IF_EXC(jni_env); } Status Frontend::GetTableFiles(const TShowFilesParams& params, TResultSet* result) { diff --git a/be/src/service/frontend.h b/be/src/service/frontend.h index 3788fa5..abcc6c3 100644 --- a/be/src/service/frontend.h +++ b/be/src/service/frontend.h @@ -186,9 +186,6 @@ class Frontend { TDescriptorTable* result); private: - /// Descriptor of Java Frontend class itself, used to create a new instance. - jclass fe_class_; - jobject fe_; // instance of org.apache.impala.service.JniFrontend jmethodID create_exec_request_id_; // JniFrontend.createExecRequest() jmethodID get_explain_plan_id_; // JniFrontend.getExplainPlan() diff --git a/tests/query_test/test_insert.py b/tests/query_test/test_insert.py index 08abbfa..e850e24 100644 --- a/tests/query_test/test_insert.py +++ b/tests/query_test/test_insert.py @@ -17,6 +17,7 @@ # Targeted Impala insert tests +import os import pytest from testdata.common import widetable @@ -81,7 +82,8 @@ class TestInsertQueries(ImpalaTestSuite): @pytest.mark.execute_serially def test_insert_large_string(self, vector, unique_database): """Test handling of large strings in inserter and scanner.""" - table_format = vector.get_value('table_format') + if "-Xcheck:jni" in os.environ.get("LIBHDFS_OPTS", ""): + pytest.skip("Test unreasonably slow with JNI checking.") table_name = unique_database + ".insert_largestring" file_format = vector.get_value('table_format').file_format