This is an automated email from the ASF dual-hosted git repository.
technoboy pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pulsar.git
The following commit(s) were added to refs/heads/master by this push:
new 5dcc9b0c3b0 [fix][broker] Fix incorrect Nic usage collected by pulsar
(#18882)
5dcc9b0c3b0 is described below
commit 5dcc9b0c3b07b88c4b744a0b5fbf3f59331f803d
Author: thetumbled <[email protected]>
AuthorDate: Wed Dec 14 11:33:18 2022 +0800
[fix][broker] Fix incorrect Nic usage collected by pulsar (#18882)
---
.../pulsar/broker/loadbalance/LinuxInfoUtils.java | 57 ++++++++++++++++++++--
.../loadbalance/impl/LinuxBrokerHostUsageImpl.java | 4 +-
.../loadbalance/LoadReportNetworkLimitTest.java | 8 +--
3 files changed, 59 insertions(+), 10 deletions(-)
diff --git
a/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/LinuxInfoUtils.java
b/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/LinuxInfoUtils.java
index 2119398fe62..42ef264b6db 100644
---
a/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/LinuxInfoUtils.java
+++
b/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/LinuxInfoUtils.java
@@ -26,6 +26,7 @@ import java.nio.file.Paths;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
+import java.util.Locale;
import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@@ -163,6 +164,30 @@ public class LinuxInfoUtils {
}
}
+ /**
+ * Determine whether nic is usable.
+ * @param nicPath Nic path
+ * @return whether nic is usable.
+ */
+ private static boolean isUsable(Path nicPath) {
+ try {
+ String operstate =
readTrimStringFromFile(nicPath.resolve("operstate"));
+ Operstate operState =
Operstate.valueOf(operstate.toUpperCase(Locale.ROOT));
+ switch (operState) {
+ case UP:
+ case UNKNOWN:
+ case DORMANT:
+ return true;
+ default:
+ return false;
+ }
+ } catch (Exception e) {
+ log.warn("[LinuxInfo] Failed to read {} NIC operstate, the detail
is: {}", nicPath, e.getMessage());
+ // Read operstate got error.
+ return false;
+ }
+ }
+
/**
* Get all physical nic limit.
* @param nics All nic path
@@ -199,12 +224,13 @@ public class LinuxInfoUtils {
}
/**
- * Get all physical nic path.
- * @return All physical nic path
+ * Get paths of all usable physical nic.
+ * @return All usable physical nic paths.
*/
- public static List<String> getPhysicalNICs() {
+ public static List<String> getUsablePhysicalNICs() {
try (Stream<Path> stream = Files.list(Paths.get(NIC_PATH))) {
return stream.filter(LinuxInfoUtils::isPhysicalNic)
+ .filter(LinuxInfoUtils::isUsable)
.map(path -> path.getFileName().toString())
.collect(Collectors.toList());
} catch (IOException e) {
@@ -218,7 +244,7 @@ public class LinuxInfoUtils {
* @return Whether the VM has nic speed
*/
public static boolean checkHasNicSpeeds() {
- List<String> physicalNICs = getPhysicalNICs();
+ List<String> physicalNICs = getUsablePhysicalNICs();
if (CollectionUtils.isEmpty(physicalNICs)) {
return false;
}
@@ -242,6 +268,29 @@ public class LinuxInfoUtils {
return Double.parseDouble(readTrimStringFromFile(path));
}
+ /**
+ * TLV IFLA_OPERSTATE
+ * contains RFC2863 state of the interface in numeric representation:
+ * See <a
href="https://www.kernel.org/doc/Documentation/networking/operstates.txt">...</a>
+ */
+ enum Operstate {
+ // Interface is in unknown state, neither driver nor userspace has set
+ // operational state. Interface must be considered for user data as
+ // setting operational state has not been implemented in every driver.
+ UNKNOWN,
+ // Interface is unable to transfer data on L1, f.e. ethernet is not
+ // plugged or interface is ADMIN down.
+ DOWN,
+ // Interfaces stacked on an interface that is IF_OPER_DOWN show this
+ // state (f.e. VLAN).
+ LOWERLAYERDOWN,
+ // Interface is L1 up, but waiting for an external event, f.e. for a
+ // protocol to establish. (802.1X)
+ DORMANT,
+ // Interface is operational up and can be used.
+ UP
+ }
+
@AllArgsConstructor
public enum NICUsageType {
// transport
diff --git
a/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/impl/LinuxBrokerHostUsageImpl.java
b/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/impl/LinuxBrokerHostUsageImpl.java
index 9e920c01a30..318f37f7f7a 100644
---
a/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/impl/LinuxBrokerHostUsageImpl.java
+++
b/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/impl/LinuxBrokerHostUsageImpl.java
@@ -21,10 +21,10 @@ package org.apache.pulsar.broker.loadbalance.impl;
import static org.apache.pulsar.broker.loadbalance.LinuxInfoUtils.NICUsageType;
import static
org.apache.pulsar.broker.loadbalance.LinuxInfoUtils.getCpuUsageForCGroup;
import static
org.apache.pulsar.broker.loadbalance.LinuxInfoUtils.getCpuUsageForEntireHost;
-import static
org.apache.pulsar.broker.loadbalance.LinuxInfoUtils.getPhysicalNICs;
import static
org.apache.pulsar.broker.loadbalance.LinuxInfoUtils.getTotalCpuLimit;
import static
org.apache.pulsar.broker.loadbalance.LinuxInfoUtils.getTotalNicLimit;
import static
org.apache.pulsar.broker.loadbalance.LinuxInfoUtils.getTotalNicUsage;
+import static
org.apache.pulsar.broker.loadbalance.LinuxInfoUtils.getUsablePhysicalNICs;
import static
org.apache.pulsar.broker.loadbalance.LinuxInfoUtils.isCGroupEnabled;
import static
org.apache.pulsar.common.util.Runnables.catchingAndLoggingThrowables;
import com.google.common.annotations.VisibleForTesting;
@@ -88,7 +88,7 @@ public class LinuxBrokerHostUsageImpl implements
BrokerHostUsage {
@Override
public void calculateBrokerHostUsage() {
- List<String> nics = getPhysicalNICs();
+ List<String> nics = getUsablePhysicalNICs();
double totalNicLimit = getTotalNicLimitWithConfiguration(nics);
double totalNicUsageTx = getTotalNicUsage(nics, NICUsageType.TX,
BitRateUnit.Kilobit);
double totalNicUsageRx = getTotalNicUsage(nics, NICUsageType.RX,
BitRateUnit.Kilobit);
diff --git
a/pulsar-broker/src/test/java/org/apache/pulsar/broker/loadbalance/LoadReportNetworkLimitTest.java
b/pulsar-broker/src/test/java/org/apache/pulsar/broker/loadbalance/LoadReportNetworkLimitTest.java
index 82e5a8212fa..bec971dfa40 100644
---
a/pulsar-broker/src/test/java/org/apache/pulsar/broker/loadbalance/LoadReportNetworkLimitTest.java
+++
b/pulsar-broker/src/test/java/org/apache/pulsar/broker/loadbalance/LoadReportNetworkLimitTest.java
@@ -29,7 +29,7 @@ import org.testng.annotations.Test;
@Test(groups = "broker")
public class LoadReportNetworkLimitTest extends MockedPulsarServiceBaseTest {
- int nicCount;
+ int usableNicCount;
@Override
protected void doInitConf() throws Exception {
@@ -43,7 +43,7 @@ public class LoadReportNetworkLimitTest extends
MockedPulsarServiceBaseTest {
public void setup() throws Exception {
super.internalSetup();
if (SystemUtils.IS_OS_LINUX) {
- nicCount = LinuxInfoUtils.getPhysicalNICs().size();
+ usableNicCount = LinuxInfoUtils.getUsablePhysicalNICs().size();
}
}
@@ -60,8 +60,8 @@ public class LoadReportNetworkLimitTest extends
MockedPulsarServiceBaseTest {
LoadManagerReport report = admin.brokerStats().getLoadReport();
if (SystemUtils.IS_OS_LINUX) {
- assertEquals(report.getBandwidthIn().limit, nicCount * 5.4 * 1000
* 1000);
- assertEquals(report.getBandwidthOut().limit, nicCount * 5.4 * 1000
* 1000);
+ assertEquals(report.getBandwidthIn().limit, usableNicCount * 5.4 *
1000 * 1000);
+ assertEquals(report.getBandwidthOut().limit, usableNicCount * 5.4
* 1000 * 1000);
} else {
// On non-Linux system we don't report the network usage
assertEquals(report.getBandwidthIn().limit, -1.0);