[ 
https://issues.apache.org/jira/browse/IOTDB-5132?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17654312#comment-17654312
 ] 

changxue commented on IOTDB-5132:
---------------------------------

master branch 昨天傍晚的最新代码。仍然有这个问题:
44 datanode 的error 日志如下,其他日志已经上传:
{code}
2023-01-04 14:32:31,692 
[pool-25-IoTDB-ClientRPC-Processor-5$20230104_063229_00005_3.1.0] ERROR 
o.a.i.d.m.e.e.RegionWriteExecutor$WritePlanNodeExecutionVisitor:146 - Something 
wrong happened while calling consensus layer's write API.
org.apache.iotdb.consensus.exception.RatisRequestFailedException: Ratis request 
failed org.apache.ratis.server.raftlog.RaftLogIOException from Server 
2@group-000200000000: Log entry size 7388963 exceeds the max buffer limit of 
4194304
        at 
org.apache.iotdb.consensus.ratis.RatisConsensus.write(RatisConsensus.java:286)
        at 
org.apache.iotdb.db.mpp.execution.executor.RegionWriteExecutor$WritePlanNodeExecutionVisitor.executePlanNodeInConsensusLayer(RegionWriteExecutor.java:161)
        at 
org.apache.iotdb.db.mpp.execution.executor.RegionWriteExecutor$WritePlanNodeExecutionVisitor.visitPlan(RegionWriteExecutor.java:138)
        at 
org.apache.iotdb.db.mpp.execution.executor.RegionWriteExecutor$WritePlanNodeExecutionVisitor.visitPlan(RegionWriteExecutor.java:128)
        at 
org.apache.iotdb.db.mpp.plan.planner.plan.node.PlanVisitor.visitCreateAlignedTimeSeries(PlanVisitor.java:219)
        at 
org.apache.iotdb.db.mpp.execution.executor.RegionWriteExecutor$WritePlanNodeExecutionVisitor.visitCreateAlignedTimeSeries(RegionWriteExecutor.java:320)
        at 
org.apache.iotdb.db.mpp.execution.executor.RegionWriteExecutor$WritePlanNodeExecutionVisitor.visitCreateAlignedTimeSeries(RegionWriteExecutor.java:128)
        at 
org.apache.iotdb.db.mpp.plan.planner.plan.node.metedata.write.CreateAlignedTimeSeriesNode.accept(CreateAlignedTimeSeriesNode.java:191)
        at 
org.apache.iotdb.db.mpp.execution.executor.RegionWriteExecutor.execute(RegionWriteExecutor.java:86)
        at 
org.apache.iotdb.db.mpp.plan.scheduler.FragmentInstanceDispatcherImpl.dispatchLocally(FragmentInstanceDispatcherImpl.java:246)
        at 
org.apache.iotdb.db.mpp.plan.scheduler.FragmentInstanceDispatcherImpl.dispatchOneInstance(FragmentInstanceDispatcherImpl.java:142)
        at 
org.apache.iotdb.db.mpp.plan.scheduler.FragmentInstanceDispatcherImpl.dispatchWriteSync(FragmentInstanceDispatcherImpl.java:124)
        at 
org.apache.iotdb.db.mpp.plan.scheduler.FragmentInstanceDispatcherImpl.dispatch(FragmentInstanceDispatcherImpl.java:94)
        at 
org.apache.iotdb.db.mpp.plan.scheduler.ClusterScheduler.start(ClusterScheduler.java:112)
        at 
org.apache.iotdb.db.mpp.plan.execution.QueryExecution.schedule(QueryExecution.java:287)
        at 
org.apache.iotdb.db.mpp.plan.execution.QueryExecution.start(QueryExecution.java:211)
        at 
org.apache.iotdb.db.mpp.plan.Coordinator.execute(Coordinator.java:152)
        at 
org.apache.iotdb.db.mpp.plan.Coordinator.execute(Coordinator.java:166)
        at 
org.apache.iotdb.db.service.thrift.impl.ClientRPCServiceImpl.createAlignedTimeseries(ClientRPCServiceImpl.java:667)
        at 
org.apache.iotdb.service.rpc.thrift.IClientRPCService$Processor$createAlignedTimeseries.getResult(IClientRPCService.java:3984)
        at 
org.apache.iotdb.service.rpc.thrift.IClientRPCService$Processor$createAlignedTimeseries.getResult(IClientRPCService.java:3964)
        at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:38)
        at 
org.apache.iotdb.db.service.thrift.ProcessorWithMetrics.process(ProcessorWithMetrics.java:64)
        at 
org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:248)
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.ratis.protocol.exceptions.StateMachineException: 
org.apache.ratis.server.raftlog.RaftLogIOException from Server 
2@group-000200000000: Log entry size 7388963 exceeds the max buffer limit of 
4194304
        at 
org.apache.ratis.server.raftlog.RaftLogBase.appendImpl(RaftLogBase.java:184)
        at 
org.apache.ratis.server.raftlog.RaftLogBase.lambda$append$2(RaftLogBase.java:161)
        at 
org.apache.ratis.server.raftlog.RaftLogSequentialOps$Runner.runSequentially(RaftLogSequentialOps.java:69)
        at 
org.apache.ratis.server.raftlog.RaftLogBase.append(RaftLogBase.java:161)
        at 
org.apache.ratis.server.impl.ServerState.appendLog(ServerState.java:366)
        at 
org.apache.ratis.server.impl.RaftServerImpl.appendTransaction(RaftServerImpl.java:770)
        at 
org.apache.ratis.server.impl.RaftServerImpl.submitClientRequestAsync(RaftServerImpl.java:878)
        at 
org.apache.ratis.server.impl.RaftServerImpl.lambda$null$12(RaftServerImpl.java:815)
        at org.apache.ratis.util.JavaUtils.callAsUnchecked(JavaUtils.java:117)
        at 
org.apache.ratis.server.impl.RaftServerImpl.lambda$executeSubmitClientRequestAsync$13(RaftServerImpl.java:815)
        at 
java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1590)
        ... 3 common frames omitted
Caused by: org.apache.ratis.server.raftlog.RaftLogIOException: Log entry size 
7388963 exceeds the max buffer limit of 4194304
        at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
        at 
sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
        at 
sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
        at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
        at 
org.apache.ratis.util.ReflectionUtils.instantiateException(ReflectionUtils.java:259)
        at 
org.apache.ratis.client.impl.ClientProtoUtils.toStateMachineException(ClientProtoUtils.java:426)
        at 
org.apache.ratis.client.impl.ClientProtoUtils.toStateMachineException(ClientProtoUtils.java:412)
        at 
org.apache.ratis.client.impl.ClientProtoUtils.toRaftClientReply(ClientProtoUtils.java:383)
        at 
java.util.concurrent.CompletableFuture.uniApply(CompletableFuture.java:602)
        at 
java.util.concurrent.CompletableFuture$UniApply.tryFire(CompletableFuture.java:577)
        at 
java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:474)
        at 
java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:1962)
        at 
org.apache.ratis.grpc.client.GrpcClientRpc$1.onNext(GrpcClientRpc.java:151)
        at 
org.apache.ratis.grpc.client.GrpcClientRpc$1.onNext(GrpcClientRpc.java:148)
        at 
org.apache.ratis.thirdparty.io.grpc.stub.ClientCalls$StreamObserverToCallListenerAdapter.onMessage(ClientCalls.java:474)
        at 
org.apache.ratis.thirdparty.io.grpc.ForwardingClientCallListener.onMessage(ForwardingClientCallListener.java:33)
        at 
org.apache.ratis.thirdparty.io.grpc.internal.ClientCallImpl$ClientStreamListenerImpl$1MessagesAvailable.runInternal(ClientCallImpl.java:661)
        at 
org.apache.ratis.thirdparty.io.grpc.internal.ClientCallImpl$ClientStreamListenerImpl$1MessagesAvailable.runInContext(ClientCallImpl.java:646)
        at 
org.apache.ratis.thirdparty.io.grpc.internal.ContextRunnable.run(ContextRunnable.java:37)
        at 
org.apache.ratis.thirdparty.io.grpc.internal.SerializingExecutor.run(SerializingExecutor.java:133)
        ... 3 common frames omitted
{code}


> 【Need reproduce】 Create aligned timeseries about 50W sensors with benchmark, 
> failed with 301 null 
> --------------------------------------------------------------------------------------------------
>
>                 Key: IOTDB-5132
>                 URL: https://issues.apache.org/jira/browse/IOTDB-5132
>             Project: Apache IoTDB
>          Issue Type: Bug
>          Components: Core/Schema Manager
>    Affects Versions: 1.0.0
>            Reporter: changxue
>            Assignee: Gaofei Cao
>            Priority: Major
>         Attachments: IOTDB-5132_allnodes-log.tar.gz, allnodes-log.tar.gz, 
> config.properties, nohup.out
>
>
> 【2023.1.3 Apply:There are not any usefully logs, need reproduce in 1.0.1 
> branch.】
>  
> create aligned timeseries about 50W sensors with benchmark, failed with 301 
> null
> environment:
> 3C3D cluster, the 1.0.0 release bin of allinone
> benchmark:
> 1.0 commit: 25c1f742
> config see attachment of config.properties and nohup.out is its full logs.
> I'm going to do performance testing with benchmark on IoTDB. There is a 
> scenario: create 50W timeseries on 1 device.
> reproduction:
> 1. start IoTDB cluster successfully 
> 2. 4 minutes later start the iot-benchmark
> error log of benchmark:
> {code:java}
> 2022-12-06 19:37:30,280 ERROR 
> cn.edu.tsinghua.iot.benchmark.iotdb100.IoTDB:359 - Register IoTDB schema 
> failed because  
> org.apache.iotdb.rpc.StatementExecutionException: 301: null
>       at org.apache.iotdb.rpc.RpcUtils.verifySuccess(RpcUtils.java:96)
>       at 
> org.apache.iotdb.session.SessionConnection.createAlignedTimeseries(SessionConnection.java:293)
>       at 
> org.apache.iotdb.session.Session.createAlignedTimeseries(Session.java:552)
>       at 
> cn.edu.tsinghua.iot.benchmark.iotdb100.IoTDB.registerTimeseries(IoTDB.java:332)
>       at 
> cn.edu.tsinghua.iot.benchmark.iotdb100.IoTDB.registerSchema(IoTDB.java:208)
>       at 
> cn.edu.tsinghua.iot.benchmark.tsdb.DBWrapper.registerSchema(DBWrapper.java:517)
>       at 
> cn.edu.tsinghua.iot.benchmark.client.SchemaClient.run(SchemaClient.java:94)
>       at 
> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
>       at java.util.concurrent.FutureTask.run(FutureTask.java:266)
>       at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>       at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>       at java.lang.Thread.run(Thread.java:748)
> {code}
> error log of datanode:
> {code:java}
> 2022-12-06 19:37:23,184 [grpc-default-executor-0] WARN  
> o.a.ratis.util.LogUtils:124 - 1: Failed requestVote 5->1#0
> org.apache.ratis.protocol.exceptions.GroupMismatchException: 1: 
> group-000200000000 not found.
>         at 
> org.apache.ratis.server.impl.RaftServerProxy$ImplMap.get(RaftServerProxy.java:150)
>         at 
> org.apache.ratis.server.impl.RaftServerProxy.getImplFuture(RaftServerProxy.java:351)
>         at 
> org.apache.ratis.server.impl.RaftServerProxy.getImpl(RaftServerProxy.java:360)
>         at 
> org.apache.ratis.server.impl.RaftServerProxy.getImpl(RaftServerProxy.java:355)
>         at 
> org.apache.ratis.server.impl.RaftServerProxy.requestVote(RaftServerProxy.java:618)
>         at 
> org.apache.ratis.grpc.server.GrpcServerProtocolService.requestVote(GrpcServerProtocolService.java:175)
>         at 
> org.apache.ratis.proto.grpc.RaftServerProtocolServiceGrpc$MethodHandlers.invoke(RaftServerProtocolServiceGrpc.java:382)
>         at 
> org.apache.ratis.thirdparty.io.grpc.stub.ServerCalls$UnaryServerCallHandler$UnaryServerCallListener.onHalfClose(ServerCalls.java:182)
>         at 
> org.apache.ratis.thirdparty.io.grpc.PartialForwardingServerCallListener.onHalfClose(PartialForwardingServerCallListener.java:35)
>         at 
> org.apache.ratis.thirdparty.io.grpc.ForwardingServerCallListener.onHalfClose(ForwardingServerCallListener.java:23)
>         at 
> org.apache.ratis.thirdparty.io.grpc.internal.ServerCallImpl$ServerStreamListenerImpl.halfClosed(ServerCallImpl.java:354)
>         at 
> org.apache.ratis.thirdparty.io.grpc.internal.ServerImpl$JumpToApplicationThreadServerStreamListener$1HalfClosed.runInContext(ServerImpl.java:866)
>         at 
> org.apache.ratis.thirdparty.io.grpc.internal.ContextRunnable.run(ContextRunnable.java:37)
>         at 
> org.apache.ratis.thirdparty.io.grpc.internal.SerializingExecutor.run(SerializingExecutor.java:133)
>         at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>         at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>         at java.lang.Thread.run(Thread.java:748)
> {code}
> 说明:
> 1. 单独创建timeseries是没有问题的。
> 2. 猜测是列太多的问题。benchmark创建的是对齐序列。
> 3. show timeseries root.** 发现并未创建成功,1个都没有



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to