[
https://issues.apache.org/jira/browse/IOTDB-5132?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17654312#comment-17654312
]
changxue commented on IOTDB-5132:
---------------------------------
master branch 昨天傍晚的最新代码。仍然有这个问题:
44 datanode 的error 日志如下,其他日志已经上传:
{code}
2023-01-04 14:32:31,692
[pool-25-IoTDB-ClientRPC-Processor-5$20230104_063229_00005_3.1.0] ERROR
o.a.i.d.m.e.e.RegionWriteExecutor$WritePlanNodeExecutionVisitor:146 - Something
wrong happened while calling consensus layer's write API.
org.apache.iotdb.consensus.exception.RatisRequestFailedException: Ratis request
failed org.apache.ratis.server.raftlog.RaftLogIOException from Server
2@group-000200000000: Log entry size 7388963 exceeds the max buffer limit of
4194304
at
org.apache.iotdb.consensus.ratis.RatisConsensus.write(RatisConsensus.java:286)
at
org.apache.iotdb.db.mpp.execution.executor.RegionWriteExecutor$WritePlanNodeExecutionVisitor.executePlanNodeInConsensusLayer(RegionWriteExecutor.java:161)
at
org.apache.iotdb.db.mpp.execution.executor.RegionWriteExecutor$WritePlanNodeExecutionVisitor.visitPlan(RegionWriteExecutor.java:138)
at
org.apache.iotdb.db.mpp.execution.executor.RegionWriteExecutor$WritePlanNodeExecutionVisitor.visitPlan(RegionWriteExecutor.java:128)
at
org.apache.iotdb.db.mpp.plan.planner.plan.node.PlanVisitor.visitCreateAlignedTimeSeries(PlanVisitor.java:219)
at
org.apache.iotdb.db.mpp.execution.executor.RegionWriteExecutor$WritePlanNodeExecutionVisitor.visitCreateAlignedTimeSeries(RegionWriteExecutor.java:320)
at
org.apache.iotdb.db.mpp.execution.executor.RegionWriteExecutor$WritePlanNodeExecutionVisitor.visitCreateAlignedTimeSeries(RegionWriteExecutor.java:128)
at
org.apache.iotdb.db.mpp.plan.planner.plan.node.metedata.write.CreateAlignedTimeSeriesNode.accept(CreateAlignedTimeSeriesNode.java:191)
at
org.apache.iotdb.db.mpp.execution.executor.RegionWriteExecutor.execute(RegionWriteExecutor.java:86)
at
org.apache.iotdb.db.mpp.plan.scheduler.FragmentInstanceDispatcherImpl.dispatchLocally(FragmentInstanceDispatcherImpl.java:246)
at
org.apache.iotdb.db.mpp.plan.scheduler.FragmentInstanceDispatcherImpl.dispatchOneInstance(FragmentInstanceDispatcherImpl.java:142)
at
org.apache.iotdb.db.mpp.plan.scheduler.FragmentInstanceDispatcherImpl.dispatchWriteSync(FragmentInstanceDispatcherImpl.java:124)
at
org.apache.iotdb.db.mpp.plan.scheduler.FragmentInstanceDispatcherImpl.dispatch(FragmentInstanceDispatcherImpl.java:94)
at
org.apache.iotdb.db.mpp.plan.scheduler.ClusterScheduler.start(ClusterScheduler.java:112)
at
org.apache.iotdb.db.mpp.plan.execution.QueryExecution.schedule(QueryExecution.java:287)
at
org.apache.iotdb.db.mpp.plan.execution.QueryExecution.start(QueryExecution.java:211)
at
org.apache.iotdb.db.mpp.plan.Coordinator.execute(Coordinator.java:152)
at
org.apache.iotdb.db.mpp.plan.Coordinator.execute(Coordinator.java:166)
at
org.apache.iotdb.db.service.thrift.impl.ClientRPCServiceImpl.createAlignedTimeseries(ClientRPCServiceImpl.java:667)
at
org.apache.iotdb.service.rpc.thrift.IClientRPCService$Processor$createAlignedTimeseries.getResult(IClientRPCService.java:3984)
at
org.apache.iotdb.service.rpc.thrift.IClientRPCService$Processor$createAlignedTimeseries.getResult(IClientRPCService.java:3964)
at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:38)
at
org.apache.iotdb.db.service.thrift.ProcessorWithMetrics.process(ProcessorWithMetrics.java:64)
at
org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:248)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.ratis.protocol.exceptions.StateMachineException:
org.apache.ratis.server.raftlog.RaftLogIOException from Server
2@group-000200000000: Log entry size 7388963 exceeds the max buffer limit of
4194304
at
org.apache.ratis.server.raftlog.RaftLogBase.appendImpl(RaftLogBase.java:184)
at
org.apache.ratis.server.raftlog.RaftLogBase.lambda$append$2(RaftLogBase.java:161)
at
org.apache.ratis.server.raftlog.RaftLogSequentialOps$Runner.runSequentially(RaftLogSequentialOps.java:69)
at
org.apache.ratis.server.raftlog.RaftLogBase.append(RaftLogBase.java:161)
at
org.apache.ratis.server.impl.ServerState.appendLog(ServerState.java:366)
at
org.apache.ratis.server.impl.RaftServerImpl.appendTransaction(RaftServerImpl.java:770)
at
org.apache.ratis.server.impl.RaftServerImpl.submitClientRequestAsync(RaftServerImpl.java:878)
at
org.apache.ratis.server.impl.RaftServerImpl.lambda$null$12(RaftServerImpl.java:815)
at org.apache.ratis.util.JavaUtils.callAsUnchecked(JavaUtils.java:117)
at
org.apache.ratis.server.impl.RaftServerImpl.lambda$executeSubmitClientRequestAsync$13(RaftServerImpl.java:815)
at
java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1590)
... 3 common frames omitted
Caused by: org.apache.ratis.server.raftlog.RaftLogIOException: Log entry size
7388963 exceeds the max buffer limit of 4194304
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at
sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at
sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at
org.apache.ratis.util.ReflectionUtils.instantiateException(ReflectionUtils.java:259)
at
org.apache.ratis.client.impl.ClientProtoUtils.toStateMachineException(ClientProtoUtils.java:426)
at
org.apache.ratis.client.impl.ClientProtoUtils.toStateMachineException(ClientProtoUtils.java:412)
at
org.apache.ratis.client.impl.ClientProtoUtils.toRaftClientReply(ClientProtoUtils.java:383)
at
java.util.concurrent.CompletableFuture.uniApply(CompletableFuture.java:602)
at
java.util.concurrent.CompletableFuture$UniApply.tryFire(CompletableFuture.java:577)
at
java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474)
at
java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:1962)
at
org.apache.ratis.grpc.client.GrpcClientRpc$1.onNext(GrpcClientRpc.java:151)
at
org.apache.ratis.grpc.client.GrpcClientRpc$1.onNext(GrpcClientRpc.java:148)
at
org.apache.ratis.thirdparty.io.grpc.stub.ClientCalls$StreamObserverToCallListenerAdapter.onMessage(ClientCalls.java:474)
at
org.apache.ratis.thirdparty.io.grpc.ForwardingClientCallListener.onMessage(ForwardingClientCallListener.java:33)
at
org.apache.ratis.thirdparty.io.grpc.internal.ClientCallImpl$ClientStreamListenerImpl$1MessagesAvailable.runInternal(ClientCallImpl.java:661)
at
org.apache.ratis.thirdparty.io.grpc.internal.ClientCallImpl$ClientStreamListenerImpl$1MessagesAvailable.runInContext(ClientCallImpl.java:646)
at
org.apache.ratis.thirdparty.io.grpc.internal.ContextRunnable.run(ContextRunnable.java:37)
at
org.apache.ratis.thirdparty.io.grpc.internal.SerializingExecutor.run(SerializingExecutor.java:133)
... 3 common frames omitted
{code}
> 【Need reproduce】 Create aligned timeseries about 50W sensors with benchmark,
> failed with 301 null
> --------------------------------------------------------------------------------------------------
>
> Key: IOTDB-5132
> URL: https://issues.apache.org/jira/browse/IOTDB-5132
> Project: Apache IoTDB
> Issue Type: Bug
> Components: Core/Schema Manager
> Affects Versions: 1.0.0
> Reporter: changxue
> Assignee: Gaofei Cao
> Priority: Major
> Attachments: IOTDB-5132_allnodes-log.tar.gz, allnodes-log.tar.gz,
> config.properties, nohup.out
>
>
> 【2023.1.3 Apply:There are not any usefully logs, need reproduce in 1.0.1
> branch.】
>
> create aligned timeseries about 50W sensors with benchmark, failed with 301
> null
> environment:
> 3C3D cluster, the 1.0.0 release bin of allinone
> benchmark:
> 1.0 commit: 25c1f742
> config see attachment of config.properties and nohup.out is its full logs.
> I'm going to do performance testing with benchmark on IoTDB. There is a
> scenario: create 50W timeseries on 1 device.
> reproduction:
> 1. start IoTDB cluster successfully
> 2. 4 minutes later start the iot-benchmark
> error log of benchmark:
> {code:java}
> 2022-12-06 19:37:30,280 ERROR
> cn.edu.tsinghua.iot.benchmark.iotdb100.IoTDB:359 - Register IoTDB schema
> failed because
> org.apache.iotdb.rpc.StatementExecutionException: 301: null
> at org.apache.iotdb.rpc.RpcUtils.verifySuccess(RpcUtils.java:96)
> at
> org.apache.iotdb.session.SessionConnection.createAlignedTimeseries(SessionConnection.java:293)
> at
> org.apache.iotdb.session.Session.createAlignedTimeseries(Session.java:552)
> at
> cn.edu.tsinghua.iot.benchmark.iotdb100.IoTDB.registerTimeseries(IoTDB.java:332)
> at
> cn.edu.tsinghua.iot.benchmark.iotdb100.IoTDB.registerSchema(IoTDB.java:208)
> at
> cn.edu.tsinghua.iot.benchmark.tsdb.DBWrapper.registerSchema(DBWrapper.java:517)
> at
> cn.edu.tsinghua.iot.benchmark.client.SchemaClient.run(SchemaClient.java:94)
> at
> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
> at java.util.concurrent.FutureTask.run(FutureTask.java:266)
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> at java.lang.Thread.run(Thread.java:748)
> {code}
> error log of datanode:
> {code:java}
> 2022-12-06 19:37:23,184 [grpc-default-executor-0] WARN
> o.a.ratis.util.LogUtils:124 - 1: Failed requestVote 5->1#0
> org.apache.ratis.protocol.exceptions.GroupMismatchException: 1:
> group-000200000000 not found.
> at
> org.apache.ratis.server.impl.RaftServerProxy$ImplMap.get(RaftServerProxy.java:150)
> at
> org.apache.ratis.server.impl.RaftServerProxy.getImplFuture(RaftServerProxy.java:351)
> at
> org.apache.ratis.server.impl.RaftServerProxy.getImpl(RaftServerProxy.java:360)
> at
> org.apache.ratis.server.impl.RaftServerProxy.getImpl(RaftServerProxy.java:355)
> at
> org.apache.ratis.server.impl.RaftServerProxy.requestVote(RaftServerProxy.java:618)
> at
> org.apache.ratis.grpc.server.GrpcServerProtocolService.requestVote(GrpcServerProtocolService.java:175)
> at
> org.apache.ratis.proto.grpc.RaftServerProtocolServiceGrpc$MethodHandlers.invoke(RaftServerProtocolServiceGrpc.java:382)
> at
> org.apache.ratis.thirdparty.io.grpc.stub.ServerCalls$UnaryServerCallHandler$UnaryServerCallListener.onHalfClose(ServerCalls.java:182)
> at
> org.apache.ratis.thirdparty.io.grpc.PartialForwardingServerCallListener.onHalfClose(PartialForwardingServerCallListener.java:35)
> at
> org.apache.ratis.thirdparty.io.grpc.ForwardingServerCallListener.onHalfClose(ForwardingServerCallListener.java:23)
> at
> org.apache.ratis.thirdparty.io.grpc.internal.ServerCallImpl$ServerStreamListenerImpl.halfClosed(ServerCallImpl.java:354)
> at
> org.apache.ratis.thirdparty.io.grpc.internal.ServerImpl$JumpToApplicationThreadServerStreamListener$1HalfClosed.runInContext(ServerImpl.java:866)
> at
> org.apache.ratis.thirdparty.io.grpc.internal.ContextRunnable.run(ContextRunnable.java:37)
> at
> org.apache.ratis.thirdparty.io.grpc.internal.SerializingExecutor.run(SerializingExecutor.java:133)
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> at java.lang.Thread.run(Thread.java:748)
> {code}
> 说明:
> 1. 单独创建timeseries是没有问题的。
> 2. 猜测是列太多的问题。benchmark创建的是对齐序列。
> 3. show timeseries root.** 发现并未创建成功,1个都没有
--
This message was sent by Atlassian Jira
(v8.20.10#820010)