Vyacheslav Tutrinov created HDDS-9807:
-----------------------------------------

             Summary: [EC] Incorrect check of available space on datanodes in 
case of allocating blocks
                 Key: HDDS-9807
                 URL: https://issues.apache.org/jira/browse/HDDS-9807
             Project: Apache Ozone
          Issue Type: Bug
          Components: EC, SCM
    Affects Versions: 1.4.0
            Reporter: Vyacheslav Tutrinov
            Assignee: Vyacheslav Tutrinov


SCM checks the datanodes availability to allocate blocks incorrectly - it 
doesn't consider the committed space (created containers max size sum).

Imagine the case:

1. The cluster has 10 datanodes with 2Gb storage mounted to /data
./hadoop-ozone/dist/target/ozone-1.4.0-SNAPSHOT/compose/ozone/docker-compose.yaml
{code:yaml}
version: "3.8"

# reusable fragments (see 
https://docs.docker.com/compose/compose-file/#extension-fields)
x-common-config:
  &common-config
  image: ${OZONE_RUNNER_IMAGE}:${OZONE_RUNNER_VERSION}
  volumes:
    - ../..:/opt/hadoop
  env_file:
    - docker-config

x-replication:
  &replication
  OZONE-SITE.XML_ozone.server.default.replication: 
${OZONE_REPLICATION_FACTOR:-1}

services:
  datanode1:
    <<: *common-config
    ports:
      - 19864
      - 9882
      - 9001:5005
    environment:
      <<: *replication
      OZONE_OPTS: 
-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5005
    command: [ "ozone","datanode" ]
    volumes:
      - tmpfs1:/data
      - ../..:/opt/hadoop
  datanode2:
    <<: *common-config
    ports:
      - 19864
      - 9882
      - 9002:5005
    environment:
      <<: *replication
      OZONE_OPTS: 
-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5005
    command: [ "ozone","datanode" ]
    volumes:
      - tmpfs2:/data
      - ../..:/opt/hadoop
  datanode3:
    <<: *common-config
    ports:
      - 19864
      - 9882
      - 9003:5005
    environment:
      <<: *replication
      OZONE_OPTS: 
-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5005
    command: [ "ozone","datanode" ]
    volumes:
      - tmpfs3:/data
      - ../..:/opt/hadoop
  datanode4:
    <<: *common-config
    ports:
      - 19864
      - 9882
      - 9004:5005
    environment:
      <<: *replication
      OZONE_OPTS: 
-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5005
    command: [ "ozone","datanode" ]
    volumes:
      - tmpfs4:/data
      - ../..:/opt/hadoop
  datanode5:
    <<: *common-config
    ports:
      - 19864
      - 9882
      - 9005:5005
    environment:
      <<: *replication
      OZONE_OPTS: 
-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5005
    command: [ "ozone","datanode" ]
    volumes:
      - tmpfs5:/data
      - ../..:/opt/hadoop
  datanode6:
    <<: *common-config
    ports:
      - 19864
      - 9882
      - 9006:5005
    environment:
      <<: *replication
      OZONE_OPTS: 
-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5005
    command: [ "ozone","datanode" ]
    volumes:
      - tmpfs6:/data
      - ../..:/opt/hadoop
  datanode7:
    <<: *common-config
    ports:
      - 19864
      - 9882
      - 9007:5005
    environment:
      <<: *replication
      OZONE_OPTS: 
-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5005
    command: [ "ozone","datanode" ]
    volumes:
      - tmpfs7:/data
      - ../..:/opt/hadoop
  datanode8:
    <<: *common-config
    ports:
      - 19864
      - 9882
      - 9008:5005
    environment:
      <<: *replication
      OZONE_OPTS: 
-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5005
    command: [ "ozone","datanode" ]
    volumes:
      - tmpfs8:/data
      - ../..:/opt/hadoop
  datanode9:
    <<: *common-config
    ports:
      - 19864
      - 9882
      - 9009:5005
    environment:
      <<: *replication
      OZONE_OPTS: 
-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5005
    command: [ "ozone","datanode" ]
    volumes:
      - tmpfs9:/data
      - ../..:/opt/hadoop
  datanode10:
    <<: *common-config
    ports:
      - 19864
      - 9882
      - 9010:5005
    environment:
      <<: *replication
      OZONE_OPTS: 
-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5005
    command: [ "ozone","datanode" ]
    volumes:
      - tmpfs10:/data
      - ../..:/opt/hadoop
  om:
    <<: *common-config
    environment:
      ENSURE_OM_INITIALIZED: /data/metadata/om/current/VERSION
      OZONE_OPTS:
      <<: *replication
    ports:
      - 9874:9874
      - 9862:9862
    command: ["ozone","om"]
  scm:
    <<: *common-config
    ports:
      - 9876:9876
      - 9860:9860
    environment:
      ENSURE_SCM_INITIALIZED: /data/metadata/scm/current/VERSION
      OZONE-SITE.XML_hdds.scm.safemode.min.datanode: 
${OZONE_SAFEMODE_MIN_DATANODES:-1}
      OZONE_OPTS:
      <<: *replication
    command: ["ozone","scm"]
  httpfs:
    <<: *common-config
    environment:
      OZONE-SITE.XML_hdds.scm.safemode.min.datanode: 
${OZONE_SAFEMODE_MIN_DATANODES:-1}
      <<: *replication
    ports:
      - 14000:14000
    command: [ "ozone","httpfs" ]
  s3g:
    <<: *common-config
    environment:
      OZONE_OPTS:
      <<: *replication
    ports:
      - 9878:9878
    command: ["ozone","s3g"]
  recon:
    <<: *common-config
    ports:
      - 9888:9888
    environment:
      OZONE_OPTS:
      <<: *replication
    command: ["ozone","recon"]
volumes:
  tmpfs1:
    driver: local
    driver_opts:
      o: "size=2g,uid=1000"
      device: tmpfs
      type: tmpfs
  tmpfs2:
    driver: local
    driver_opts:
      o: "size=2g,uid=2000"
      device: tmpfs
      type: tmpfs
  tmpfs3:
    driver: local
    driver_opts:
      o: "size=2g,uid=3000"
      device: tmpfs
      type: tmpfs
  tmpfs4:
    driver: local
    driver_opts:
      o: "size=2g,uid=4000"
      device: tmpfs
      type: tmpfs
  tmpfs5:
    driver: local
    driver_opts:
      o: "size=2g,uid=5000"
      device: tmpfs
      type: tmpfs
  tmpfs6:
    driver: local
    driver_opts:
      o: "size=2g,uid=6000"
      device: tmpfs
      type: tmpfs
  tmpfs7:
    driver: local
    driver_opts:
      o: "size=2g,uid=7000"
      device: tmpfs
      type: tmpfs
  tmpfs8:
    driver: local
    driver_opts:
      o: "size=2g,uid=8000"
      device: tmpfs
      type: tmpfs
  tmpfs9:
    driver: local
    driver_opts:
      o: "size=2g,uid=9000"
      device: tmpfs
      type: tmpfs
  tmpfs10:
    driver: local
    driver_opts:
      o: "size=2g,uid=10000"
      device: tmpfs
      type: tmpfs

{code}
./hadoop-ozone/dist/target/ozone-1.4.0-SNAPSHOT/compose/ozone/.env
{code}
...
OZONE_REPLICATION_FACTOR=3
...
{code}
./hadoop-ozone/dist/target/ozone-1.4.0-SNAPSHOT/compose/ozone/docker-config
{code}
...
OZONE-SITE.XML_ozone.scm.pipeline.creation.auto.factor.one=false
...
{code}
2. There is an EC-bucket with rs-6-3-1024k replication config
{code:bash}
ozone sh volume create data
ozone sh bucket create data/bucket1 --type EC --replication rs-6-3-1024k 
--layout LEGACY
ozone sh bucket link data/bucket1 s3v/bucket1
{code}
3. Create 200KiB file and put it to the bucket
{code:bash}
head -c 200KiB </dev/urandom > /tmp/test_file_200KiB
ozone sh key put s3v/bucket1/test_key_200KiB_001 /tmp/test_file_200KiB
{code}

A new EC-pipeline will be created:
{code}
#scm log
2023-11-30 08:33:26,124 [IPC Server handler 7 on default port 9863] INFO 
pipeline.WritableECContainerProvider: Created and opened new pipeline Pipeline[ 
Id: 70b771a8-0141-4447-8e0f-730b9fba2c34, Nodes: 
05f06265-66e3-407d-9429-a31754686468(ozone-datanode4-1.ozone_default/192.168.176.5)3c4549f6-e3b9-44a5-8e8b-5c1078ddaffa(ozone-datanode5-1.ozone_default/192.168.176.3)dbb2a07e-5b7d-4aef-a7cc-aed3134563ae(ozone-datanode10-1.ozone_default/192.168.176.6)1f6131be-4cec-465d-a5cd-cf7b87824b7f(ozone-datanode3-1.ozone_default/192.168.176.7)afd41e81-1ead-4c5a-b087-8f1bb69e2574(ozone-datanode6-1.ozone_default/192.168.176.2)9a144484-a05a-42e4-813e-4aaccf390ea8(ozone-datanode9-1.ozone_default/192.168.176.12)65fd7e45-140b-4524-b0e3-800ca5fb0724(ozone-datanode8-1.ozone_default/192.168.176.8)993705b9-1599-4901-a629-56fbc4c29971(ozone-datanode2-1.ozone_default/192.168.176.13)8614d173-4001-46d4-a4e2-1a30339b8585(ozone-datanode1-1.ozone_default/192.168.176.10),
 ReplicationConfig: EC{rs-6-3-1024k}, State:ALLOCATED, leaderId:, 
CreationTimestamp2023-11-30T08:33:26.080416Z[UTC]]

# ozone admin pipeline list
Pipeline[ Id: 077f1a30-0dec-4538-a66f-509583223052, Nodes: 
05f06265-66e3-407d-9429-a31754686468(ozone-datanode4-1.ozone_default/192.168.176.5)1f6131be-4cec-465d-a5cd-cf7b87824b7f(ozone-datanode3-1.ozone_default/192.168.176.7)8614d173-4001-46d4-a4e2-1a30339b8585(ozone-datanode1-1.ozone_default/192.168.176.10),
 ReplicationConfig: RATIS/THREE, State:OPEN, 
leaderId:05f06265-66e3-407d-9429-a31754686468, 
CreationTimestamp2023-11-30T08:30:47.873Z[UTC]]
Pipeline[ Id: cda08d91-afee-4d31-ad16-02ea3313e502, Nodes: 
afd41e81-1ead-4c5a-b087-8f1bb69e2574(ozone-datanode6-1.ozone_default/192.168.176.2)65fd7e45-140b-4524-b0e3-800ca5fb0724(ozone-datanode8-1.ozone_default/192.168.176.8)993705b9-1599-4901-a629-56fbc4c29971(ozone-datanode2-1.ozone_default/192.168.176.13),
 ReplicationConfig: RATIS/THREE, State:OPEN, 
leaderId:afd41e81-1ead-4c5a-b087-8f1bb69e2574, 
CreationTimestamp2023-11-30T08:30:47.508Z[UTC]]
Pipeline[ Id: 70b771a8-0141-4447-8e0f-730b9fba2c34, Nodes: 
05f06265-66e3-407d-9429-a31754686468(ozone-datanode4-1.ozone_default/192.168.176.5)3c4549f6-e3b9-44a5-8e8b-5c1078ddaffa(ozone-datanode5-1.ozone_default/192.168.176.3)dbb2a07e-5b7d-4aef-a7cc-aed3134563ae(ozone-datanode10-1.ozone_default/192.168.176.6)1f6131be-4cec-465d-a5cd-cf7b87824b7f(ozone-datanode3-1.ozone_default/192.168.176.7)afd41e81-1ead-4c5a-b087-8f1bb69e2574(ozone-datanode6-1.ozone_default/192.168.176.2)9a144484-a05a-42e4-813e-4aaccf390ea8(ozone-datanode9-1.ozone_default/192.168.176.12)65fd7e45-140b-4524-b0e3-800ca5fb0724(ozone-datanode8-1.ozone_default/192.168.176.8)993705b9-1599-4901-a629-56fbc4c29971(ozone-datanode2-1.ozone_default/192.168.176.13)8614d173-4001-46d4-a4e2-1a30339b8585(ozone-datanode1-1.ozone_default/192.168.176.10),
 ReplicationConfig: EC{rs-6-3-1024k}, State:OPEN, leaderId:, 
CreationTimestamp2023-11-30T08:33:26.080Z[UTC]]
Pipeline[ Id: d46e8c43-ed23-460a-8200-bb4af0599cae, Nodes: 
9a144484-a05a-42e4-813e-4aaccf390ea8(ozone-datanode9-1.ozone_default/192.168.176.12)e68158dc-6f86-4304-b78d-86c4fa93cd7d(ozone-datanode7-1.ozone_default/192.168.176.15)3c4549f6-e3b9-44a5-8e8b-5c1078ddaffa(ozone-datanode5-1.ozone_default/192.168.176.3),
 ReplicationConfig: RATIS/THREE, State:OPEN, 
leaderId:3c4549f6-e3b9-44a5-8e8b-5c1078ddaffa, 
CreationTimestamp2023-11-30T08:30:48.503Z[UTC]]
{code}

Datanodes usageinfo
{code}
Usage Information (1 Datanodes)

UUID         : 8614d173-4001-46d4-a4e2-1a30339b8585 
IP Address   : 192.168.176.10 
Hostname     : ozone-datanode1-1.ozone_default 
Capacity     : 2147483648 B (2 GB) 
Total Used   : 179290112 B (170.98 MB) 
Total Used % : 8.35% 
Ozone Used   : 204800 B (200 KB) 
Ozone Used % : 0.01% 
Remaining    : 1968193536 B (1.83 GB) 
Remaining %  : 91.65% 
Container(s) : 1 

Usage Information (1 Datanodes)

UUID         : 993705b9-1599-4901-a629-56fbc4c29971 
IP Address   : 192.168.176.13 
Hostname     : ozone-datanode2-1.ozone_default 
Capacity     : 2147483648 B (2 GB) 
Total Used   : 179290112 B (170.98 MB) 
Total Used % : 8.35% 
Ozone Used   : 204800 B (200 KB) 
Ozone Used % : 0.01% 
Remaining    : 1968193536 B (1.83 GB) 
Remaining %  : 91.65% 
Container(s) : 1 

Usage Information (1 Datanodes)

UUID         : 9a144484-a05a-42e4-813e-4aaccf390ea8 
IP Address   : 192.168.176.12 
Hostname     : ozone-datanode9-1.ozone_default 
Capacity     : 2147483648 B (2 GB) 
Total Used   : 179085312 B (170.79 MB) 
Total Used % : 8.34% 
Ozone Used   : 0 B (0 B) 
Ozone Used % : 0.00% 
Remaining    : 1968398336 B (1.83 GB) 
Remaining %  : 91.66% 
Container(s) : 1 

Usage Information (1 Datanodes)

UUID         : afd41e81-1ead-4c5a-b087-8f1bb69e2574 
IP Address   : 192.168.176.2 
Hostname     : ozone-datanode6-1.ozone_default 
Capacity     : 2147483648 B (2 GB) 
Total Used   : 179085312 B (170.79 MB) 
Total Used % : 8.34% 
Ozone Used   : 0 B (0 B) 
Ozone Used % : 0.00% 
Remaining    : 1968398336 B (1.83 GB) 
Remaining %  : 91.66% 
Container(s) : 1 

Usage Information (1 Datanodes)

UUID         : dbb2a07e-5b7d-4aef-a7cc-aed3134563ae 
IP Address   : 192.168.176.6 
Hostname     : ozone-datanode10-1.ozone_default 
Capacity     : 2147483648 B (2 GB) 
Total Used   : 174878720 B (166.78 MB) 
Total Used % : 8.14% 
Ozone Used   : 0 B (0 B) 
Ozone Used % : 0.00% 
Remaining    : 1972604928 B (1.84 GB) 
Remaining %  : 91.86% 
Container(s) : 1 

Usage Information (1 Datanodes)

UUID         : e68158dc-6f86-4304-b78d-86c4fa93cd7d 
IP Address   : 192.168.176.15 
Hostname     : ozone-datanode7-1.ozone_default 
Capacity     : 2147483648 B (2 GB) 
Total Used   : 31440896 B (29.98 MB) 
Total Used % : 1.46% 
Ozone Used   : 0 B (0 B) 
Ozone Used % : 0.00% 
Remaining    : 2116042752 B (1.97 GB) 
Remaining %  : 98.54% 
Container(s) : 0 

Usage Information (1 Datanodes)

UUID         : 05f06265-66e3-407d-9429-a31754686468 
IP Address   : 192.168.176.5 
Hostname     : ozone-datanode4-1.ozone_default 
Capacity     : 2147483648 B (2 GB) 
Total Used   : 179290112 B (170.98 MB) 
Total Used % : 8.35% 
Ozone Used   : 204800 B (200 KB) 
Ozone Used % : 0.01% 
Remaining    : 1968193536 B (1.83 GB) 
Remaining %  : 91.65% 
Container(s) : 1 

Usage Information (1 Datanodes)

UUID         : 1f6131be-4cec-465d-a5cd-cf7b87824b7f 
IP Address   : 192.168.176.7 
Hostname     : ozone-datanode3-1.ozone_default 
Capacity     : 2147483648 B (2 GB) 
Total Used   : 179085312 B (170.79 MB) 
Total Used % : 8.34% 
Ozone Used   : 0 B (0 B) 
Ozone Used % : 0.00% 
Remaining    : 1968398336 B (1.83 GB) 
Remaining %  : 91.66% 
Container(s) : 1 

Usage Information (1 Datanodes)

UUID         : 3c4549f6-e3b9-44a5-8e8b-5c1078ddaffa 
IP Address   : 192.168.176.3 
Hostname     : ozone-datanode5-1.ozone_default 
Capacity     : 2147483648 B (2 GB) 
Total Used   : 179085312 B (170.79 MB) 
Total Used % : 8.34% 
Ozone Used   : 0 B (0 B) 
Ozone Used % : 0.00% 
Remaining    : 1968398336 B (1.83 GB) 
Remaining %  : 91.66% 
Container(s) : 1 

Usage Information (1 Datanodes)

UUID         : 65fd7e45-140b-4524-b0e3-800ca5fb0724 
IP Address   : 192.168.176.8 
Hostname     : ozone-datanode8-1.ozone_default 
Capacity     : 2147483648 B (2 GB) 
Total Used   : 179290112 B (170.98 MB) 
Total Used % : 8.35% 
Ozone Used   : 204800 B (200 KB) 
Ozone Used % : 0.01% 
Remaining    : 1968193536 B (1.83 GB) 
Remaining %  : 91.65% 
Container(s) : 1 

{code}

4. Now let's try to create a 100MiB file and put it down to the same bucket
{code:bash}
head -c 100MiB </dev/urandom > /tmp/test_file_100MiB
ozone sh key put s3v/bucket1/test_key_100MiB_001 /tmp/test_file_100MiB
{code}
The request will fail with the next error on the client side:
{code}
INTERNAL_ERROR No enough datanodes to choose. TotalNodes = 10 AvailableNodes = 
0 RequiredNodes = 9 ExcludedNodes = 10 UsedNodes = 0
{code}
The SCM creates EC-pipelines up to the max pipeline count:
{code}
ozone-scm-1         | 2023-11-30 08:40:04,485 [IPC Server handler 20 on default 
port 9863] INFO algorithms.SCMContainerPlacementRackScatter: Chosen nodes: 
[1f6131be-4cec-465d-a5cd-cf7b87824b7f(ozone-datanode3-1.ozone_default/192.168.176.7),
 
993705b9-1599-4901-a629-56fbc4c29971(ozone-datanode2-1.ozone_default/192.168.176.13),
 
9a144484-a05a-42e4-813e-4aaccf390ea8(ozone-datanode9-1.ozone_default/192.168.176.12),
 
3c4549f6-e3b9-44a5-8e8b-5c1078ddaffa(ozone-datanode5-1.ozone_default/192.168.176.3),
 
8614d173-4001-46d4-a4e2-1a30339b8585(ozone-datanode1-1.ozone_default/192.168.176.10),
 
e68158dc-6f86-4304-b78d-86c4fa93cd7d(ozone-datanode7-1.ozone_default/192.168.176.15),
 
65fd7e45-140b-4524-b0e3-800ca5fb0724(ozone-datanode8-1.ozone_default/192.168.176.8),
 
afd41e81-1ead-4c5a-b087-8f1bb69e2574(ozone-datanode6-1.ozone_default/192.168.176.2),
 
dbb2a07e-5b7d-4aef-a7cc-aed3134563ae(ozone-datanode10-1.ozone_default/192.168.176.6)].
 isPolicySatisfied: true.
ozone-scm-1         | 2023-11-30 08:40:04,502 [IPC Server handler 20 on default 
port 9863] INFO pipeline.WritableECContainerProvider: Created and opened new 
pipeline Pipeline[ Id: 42d76b70-84f5-42a1-980a-e3fc3445edb6, Nodes: 
1f6131be-4cec-465d-a5cd-cf7b87824b7f(ozone-datanode3-1.ozone_default/192.168.176.7)993705b9-1599-4901-a629-56fbc4c29971(ozone-datanode2-1.ozone_default/192.168.176.13)9a144484-a05a-42e4-813e-4aaccf390ea8(ozone-datanode9-1.ozone_default/192.168.176.12)3c4549f6-e3b9-44a5-8e8b-5c1078ddaffa(ozone-datanode5-1.ozone_default/192.168.176.3)8614d173-4001-46d4-a4e2-1a30339b8585(ozone-datanode1-1.ozone_default/192.168.176.10)e68158dc-6f86-4304-b78d-86c4fa93cd7d(ozone-datanode7-1.ozone_default/192.168.176.15)65fd7e45-140b-4524-b0e3-800ca5fb0724(ozone-datanode8-1.ozone_default/192.168.176.8)afd41e81-1ead-4c5a-b087-8f1bb69e2574(ozone-datanode6-1.ozone_default/192.168.176.2)dbb2a07e-5b7d-4aef-a7cc-aed3134563ae(ozone-datanode10-1.ozone_default/192.168.176.6),
 ReplicationConfig: EC{rs-6-3-1024k}, State:ALLOCATED, leaderId:, 
CreationTimestamp2023-11-30T08:40:04.487343Z[UTC]]
ozone-scm-1         | 2023-11-30 08:40:04,503 [IPC Server handler 20 on default 
port 9863] INFO algorithms.SCMContainerPlacementRackScatter: Chosen nodes: 
[afd41e81-1ead-4c5a-b087-8f1bb69e2574(ozone-datanode6-1.ozone_default/192.168.176.2),
 
1f6131be-4cec-465d-a5cd-cf7b87824b7f(ozone-datanode3-1.ozone_default/192.168.176.7),
 
8614d173-4001-46d4-a4e2-1a30339b8585(ozone-datanode1-1.ozone_default/192.168.176.10),
 
3c4549f6-e3b9-44a5-8e8b-5c1078ddaffa(ozone-datanode5-1.ozone_default/192.168.176.3),
 
dbb2a07e-5b7d-4aef-a7cc-aed3134563ae(ozone-datanode10-1.ozone_default/192.168.176.6),
 
05f06265-66e3-407d-9429-a31754686468(ozone-datanode4-1.ozone_default/192.168.176.5),
 
e68158dc-6f86-4304-b78d-86c4fa93cd7d(ozone-datanode7-1.ozone_default/192.168.176.15),
 
993705b9-1599-4901-a629-56fbc4c29971(ozone-datanode2-1.ozone_default/192.168.176.13),
 
9a144484-a05a-42e4-813e-4aaccf390ea8(ozone-datanode9-1.ozone_default/192.168.176.12)].
 isPolicySatisfied: true.
ozone-scm-1         | 2023-11-30 08:40:04,510 [IPC Server handler 20 on default 
port 9863] INFO pipeline.WritableECContainerProvider: Created and opened new 
pipeline Pipeline[ Id: 498dfea3-17ee-4600-a3b9-94727c1cd729, Nodes: 
afd41e81-1ead-4c5a-b087-8f1bb69e2574(ozone-datanode6-1.ozone_default/192.168.176.2)1f6131be-4cec-465d-a5cd-cf7b87824b7f(ozone-datanode3-1.ozone_default/192.168.176.7)8614d173-4001-46d4-a4e2-1a30339b8585(ozone-datanode1-1.ozone_default/192.168.176.10)3c4549f6-e3b9-44a5-8e8b-5c1078ddaffa(ozone-datanode5-1.ozone_default/192.168.176.3)dbb2a07e-5b7d-4aef-a7cc-aed3134563ae(ozone-datanode10-1.ozone_default/192.168.176.6)05f06265-66e3-407d-9429-a31754686468(ozone-datanode4-1.ozone_default/192.168.176.5)e68158dc-6f86-4304-b78d-86c4fa93cd7d(ozone-datanode7-1.ozone_default/192.168.176.15)993705b9-1599-4901-a629-56fbc4c29971(ozone-datanode2-1.ozone_default/192.168.176.13)9a144484-a05a-42e4-813e-4aaccf390ea8(ozone-datanode9-1.ozone_default/192.168.176.12),
 ReplicationConfig: EC{rs-6-3-1024k}, State:ALLOCATED, leaderId:, 
CreationTimestamp2023-11-30T08:40:04.503388Z[UTC]]
ozone-scm-1         | 2023-11-30 08:40:04,511 [IPC Server handler 20 on default 
port 9863] INFO algorithms.SCMContainerPlacementRackScatter: Chosen nodes: 
[8614d173-4001-46d4-a4e2-1a30339b8585(ozone-datanode1-1.ozone_default/192.168.176.10),
 
993705b9-1599-4901-a629-56fbc4c29971(ozone-datanode2-1.ozone_default/192.168.176.13),
 
05f06265-66e3-407d-9429-a31754686468(ozone-datanode4-1.ozone_default/192.168.176.5),
 
9a144484-a05a-42e4-813e-4aaccf390ea8(ozone-datanode9-1.ozone_default/192.168.176.12),
 
65fd7e45-140b-4524-b0e3-800ca5fb0724(ozone-datanode8-1.ozone_default/192.168.176.8),
 
afd41e81-1ead-4c5a-b087-8f1bb69e2574(ozone-datanode6-1.ozone_default/192.168.176.2),
 
e68158dc-6f86-4304-b78d-86c4fa93cd7d(ozone-datanode7-1.ozone_default/192.168.176.15),
 
dbb2a07e-5b7d-4aef-a7cc-aed3134563ae(ozone-datanode10-1.ozone_default/192.168.176.6),
 
3c4549f6-e3b9-44a5-8e8b-5c1078ddaffa(ozone-datanode5-1.ozone_default/192.168.176.3)].
 isPolicySatisfied: true.
ozone-scm-1         | 2023-11-30 08:40:04,518 [IPC Server handler 20 on default 
port 9863] INFO pipeline.WritableECContainerProvider: Created and opened new 
pipeline Pipeline[ Id: 93539a72-b48d-4a22-8d0d-bac58d217e42, Nodes: 
8614d173-4001-46d4-a4e2-1a30339b8585(ozone-datanode1-1.ozone_default/192.168.176.10)993705b9-1599-4901-a629-56fbc4c29971(ozone-datanode2-1.ozone_default/192.168.176.13)05f06265-66e3-407d-9429-a31754686468(ozone-datanode4-1.ozone_default/192.168.176.5)9a144484-a05a-42e4-813e-4aaccf390ea8(ozone-datanode9-1.ozone_default/192.168.176.12)65fd7e45-140b-4524-b0e3-800ca5fb0724(ozone-datanode8-1.ozone_default/192.168.176.8)afd41e81-1ead-4c5a-b087-8f1bb69e2574(ozone-datanode6-1.ozone_default/192.168.176.2)e68158dc-6f86-4304-b78d-86c4fa93cd7d(ozone-datanode7-1.ozone_default/192.168.176.15)dbb2a07e-5b7d-4aef-a7cc-aed3134563ae(ozone-datanode10-1.ozone_default/192.168.176.6)3c4549f6-e3b9-44a5-8e8b-5c1078ddaffa(ozone-datanode5-1.ozone_default/192.168.176.3),
 ReplicationConfig: EC{rs-6-3-1024k}, State:ALLOCATED, leaderId:, 
CreationTimestamp2023-11-30T08:40:04.511440Z[UTC]]
ozone-scm-1         | 2023-11-30 08:40:04,518 [IPC Server handler 20 on default 
port 9863] INFO algorithms.SCMContainerPlacementRackScatter: Chosen nodes: 
[1f6131be-4cec-465d-a5cd-cf7b87824b7f(ozone-datanode3-1.ozone_default/192.168.176.7),
 
afd41e81-1ead-4c5a-b087-8f1bb69e2574(ozone-datanode6-1.ozone_default/192.168.176.2),
 
dbb2a07e-5b7d-4aef-a7cc-aed3134563ae(ozone-datanode10-1.ozone_default/192.168.176.6),
 
9a144484-a05a-42e4-813e-4aaccf390ea8(ozone-datanode9-1.ozone_default/192.168.176.12),
 
993705b9-1599-4901-a629-56fbc4c29971(ozone-datanode2-1.ozone_default/192.168.176.13),
 
8614d173-4001-46d4-a4e2-1a30339b8585(ozone-datanode1-1.ozone_default/192.168.176.10),
 
05f06265-66e3-407d-9429-a31754686468(ozone-datanode4-1.ozone_default/192.168.176.5),
 
3c4549f6-e3b9-44a5-8e8b-5c1078ddaffa(ozone-datanode5-1.ozone_default/192.168.176.3),
 
e68158dc-6f86-4304-b78d-86c4fa93cd7d(ozone-datanode7-1.ozone_default/192.168.176.15)].
 isPolicySatisfied: true.
ozone-scm-1         | 2023-11-30 08:40:04,529 [IPC Server handler 20 on default 
port 9863] INFO pipeline.WritableECContainerProvider: Created and opened new 
pipeline Pipeline[ Id: 8f8cce33-8631-4e42-b3ac-34f8708be23a, Nodes: 
1f6131be-4cec-465d-a5cd-cf7b87824b7f(ozone-datanode3-1.ozone_default/192.168.176.7)afd41e81-1ead-4c5a-b087-8f1bb69e2574(ozone-datanode6-1.ozone_default/192.168.176.2)dbb2a07e-5b7d-4aef-a7cc-aed3134563ae(ozone-datanode10-1.ozone_default/192.168.176.6)9a144484-a05a-42e4-813e-4aaccf390ea8(ozone-datanode9-1.ozone_default/192.168.176.12)993705b9-1599-4901-a629-56fbc4c29971(ozone-datanode2-1.ozone_default/192.168.176.13)8614d173-4001-46d4-a4e2-1a30339b8585(ozone-datanode1-1.ozone_default/192.168.176.10)05f06265-66e3-407d-9429-a31754686468(ozone-datanode4-1.ozone_default/192.168.176.5)3c4549f6-e3b9-44a5-8e8b-5c1078ddaffa(ozone-datanode5-1.ozone_default/192.168.176.3)e68158dc-6f86-4304-b78d-86c4fa93cd7d(ozone-datanode7-1.ozone_default/192.168.176.15),
 ReplicationConfig: EC{rs-6-3-1024k}, State:ALLOCATED, leaderId:, 
CreationTimestamp2023-11-30T08:40:04.518973Z[UTC]]
{code}
But pipelines's reserved datanodes are unavailable to create new containers:
{code}
ozone-datanode8-1   | 2023-11-30 08:40:06,062 
[65fd7e45-140b-4524-b0e3-800ca5fb0724-ChunkReader-6] INFO 
volume.CapacityVolumeChoosingPolicy: No volumes have enough space for a new 
container.  Most available space: 894656512 bytes; required space: 1073741824, 
volumes: {/data/hdds/hdds=free: 1968193536, committed: 1073537024}
ozone-datanode1-1   | 2023-11-30 08:40:06,063 
[8614d173-4001-46d4-a4e2-1a30339b8585-ChunkReader-5] INFO 
volume.CapacityVolumeChoosingPolicy: No volumes have enough space for a new 
container.  Most available space: 894656512 bytes; required space: 1073741824, 
volumes: {/data/hdds/hdds=free: 1968193536, committed: 1073537024}
ozone-datanode9-1   | 2023-11-30 08:40:06,070 
[9a144484-a05a-42e4-813e-4aaccf390ea8-ChunkReader-4] INFO 
volume.CapacityVolumeChoosingPolicy: No volumes have enough space for a new 
container.  Most available space: 894656512 bytes; required space: 1073741824, 
volumes: {/data/hdds/hdds=free: 1968398336, committed: 1073741824}
ozone-datanode2-1   | 2023-11-30 08:40:06,078 
[993705b9-1599-4901-a629-56fbc4c29971-ChunkReader-6] INFO 
volume.CapacityVolumeChoosingPolicy: No volumes have enough space for a new 
container.  Most available space: 894656512 bytes; required space: 1073741824, 
volumes: {/data/hdds/hdds=free: 1968193536, committed: 1073537024}
ozone-datanode3-1   | 2023-11-30 08:40:06,093 
[1f6131be-4cec-465d-a5cd-cf7b87824b7f-ChunkReader-4] INFO 
volume.CapacityVolumeChoosingPolicy: No volumes have enough space for a new 
container.  Most available space: 894656512 bytes; required space: 1073741824, 
volumes: {/data/hdds/hdds=free: 1968398336, committed: 1073741824}
ozone-datanode5-1   | 2023-11-30 08:40:06,102 
[3c4549f6-e3b9-44a5-8e8b-5c1078ddaffa-ChunkReader-4] INFO 
volume.CapacityVolumeChoosingPolicy: No volumes have enough space for a new 
container.  Most available space: 894656512 bytes; required space: 1073741824, 
volumes: {/data/hdds/hdds=free: 1968398336, committed: 1073741824}
ozone-datanode6-1   | 2023-11-30 08:40:06,136 
[afd41e81-1ead-4c5a-b087-8f1bb69e2574-ChunkReader-3] INFO 
volume.CapacityVolumeChoosingPolicy: No volumes have enough space for a new 
container.  Most available space: 894656512 bytes; required space: 1073741824, 
volumes: {/data/hdds/hdds=free: 1968398336, committed: 1073741824}
ozone-datanode10-1  | 2023-11-30 08:40:06,150 
[dbb2a07e-5b7d-4aef-a7cc-aed3134563ae-ChunkReader-4] INFO 
volume.CapacityVolumeChoosingPolicy: No volumes have enough space for a new 
container.  Most available space: 898863104 bytes; required space: 1073741824, 
volumes: {/data/hdds/hdds=free: 1972604928, committed: 1073741824}
ozone-datanode8-1   | 2023-11-30 08:40:06,239 
[65fd7e45-140b-4524-b0e3-800ca5fb0724-ChunkReader-6] WARN 
keyvalue.KeyValueHandler: Operation: CreateContainer , Trace ID:  , Message: 
Container creation failed, due to disk out of space , Result: DISK_OUT_OF_SPACE 
, StorageContainerException Occurred.
ozone-datanode8-1   | 
org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException: 
Container creation failed, due to disk out of space
ozone-datanode8-1   |   at 
org.apache.hadoop.ozone.container.keyvalue.KeyValueContainer.create(KeyValueContainer.java:162)
ozone-datanode8-1   |   at 
org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.handleCreateContainer(KeyValueHandler.java:367)
ozone-datanode8-1   |   at 
org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.dispatchRequest(KeyValueHandler.java:239)
ozone-datanode8-1   |   at 
org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler.handle(KeyValueHandler.java:222)
ozone-datanode8-1   |   at 
org.apache.hadoop.ozone.container.common.impl.HddsDispatcher.createContainer(HddsDispatcher.java:469)
ozone-datanode8-1   |   at 
org.apache.hadoop.ozone.container.common.impl.HddsDispatcher.dispatchRequest(HddsDispatcher.java:275)
ozone-datanode8-1   |   at 
org.apache.hadoop.ozone.container.common.impl.HddsDispatcher.lambda$dispatch$0(HddsDispatcher.java:179)
ozone-datanode8-1   |   at 
org.apache.hadoop.hdds.server.OzoneProtocolMessageDispatcher.processRequest(OzoneProtocolMessageDispatcher.java:89)
ozone-datanode8-1   |   at 
org.apache.hadoop.ozone.container.common.impl.HddsDispatcher.dispatch(HddsDispatcher.java:178)
ozone-datanode8-1   |   at 
org.apache.hadoop.ozone.container.common.transport.server.GrpcXceiverService$1.onNext(GrpcXceiverService.java:57)
ozone-datanode8-1   |   at 
org.apache.hadoop.ozone.container.common.transport.server.GrpcXceiverService$1.onNext(GrpcXceiverService.java:50)
ozone-datanode8-1   |   at 
org.apache.ratis.thirdparty.io.grpc.stub.ServerCalls$StreamingServerCallHandler$StreamingServerCallListener.onMessage(ServerCalls.java:262)
ozone-datanode8-1   |   at 
org.apache.ratis.thirdparty.io.grpc.ForwardingServerCallListener.onMessage(ForwardingServerCallListener.java:33)
ozone-datanode8-1   |   at 
org.apache.hadoop.hdds.tracing.GrpcServerInterceptor$1.onMessage(GrpcServerInterceptor.java:49)
ozone-datanode8-1   |   at 
org.apache.ratis.thirdparty.io.grpc.internal.ServerCallImpl$ServerStreamListenerImpl.messagesAvailableInternal(ServerCallImpl.java:329)
ozone-datanode8-1   |   at 
org.apache.ratis.thirdparty.io.grpc.internal.ServerCallImpl$ServerStreamListenerImpl.messagesAvailable(ServerCallImpl.java:314)
ozone-datanode8-1   |   at 
org.apache.ratis.thirdparty.io.grpc.internal.ServerImpl$JumpToApplicationThreadServerStreamListener$1MessagesAvailable.runInContext(ServerImpl.java:833)
ozone-datanode8-1   |   at 
org.apache.ratis.thirdparty.io.grpc.internal.ContextRunnable.run(ContextRunnable.java:37)
ozone-datanode8-1   |   at 
org.apache.ratis.thirdparty.io.grpc.internal.SerializingExecutor.run(SerializingExecutor.java:133)
ozone-datanode8-1   |   at 
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
ozone-datanode8-1   |   at 
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
ozone-datanode8-1   |   at java.base/java.lang.Thread.run(Thread.java:829)
ozone-datanode8-1   | Caused by: 
org.apache.hadoop.util.DiskChecker$DiskOutOfSpaceException: No volumes have 
enough space for a new container.  Most available space: 894656512 bytes
ozone-datanode8-1   |   at 
org.apache.hadoop.ozone.container.common.volume.VolumeChoosingUtil.throwDiskOutOfSpace(VolumeChoosingUtil.java:38)
ozone-datanode8-1   |   at 
org.apache.hadoop.ozone.container.common.volume.CapacityVolumeChoosingPolicy.chooseVolume(CapacityVolumeChoosingPolicy.java:68)
ozone-datanode8-1   |   at 
org.apache.hadoop.ozone.container.keyvalue.KeyValueContainer.create(KeyValueContainer.java:160)
ozone-datanode8-1   |   ... 21 more
{code}
because the SCM and datanodes check the volume availability in a different 
manner:

SCM 
(org.apache.hadoop.hdds.scm.container.placement.algorithms.SCMContainerPlacementRackScatter#chooseNode(java.lang.String,
 java.util.List<org.apache.hadoop.hdds.scm.net.Node>, long, long) -> 
org.apache.hadoop.hdds.scm.SCMCommonPlacementPolicy#isValidNode -> 
org.apache.hadoop.hdds.scm.SCMCommonPlacementPolicy#hasEnoughSpace)
{code:java}
if (dataSizeRequired > 0) {
      for (StorageReportProto reportProto : datanodeInfo.getStorageReports()) {
        if (reportProto.getRemaining() > dataSizeRequired) {
          enoughForData = true;
          break;
        }
      }
    } else {
      enoughForData = true;
    }
{code}

Datanode 
(org.apache.hadoop.ozone.container.common.volume.AvailableSpaceFilter#test)
{code:java}
public boolean test(HddsVolume vol) {
    long volumeCapacity = vol.getCapacity();
    long free = vol.getAvailable();
    long committed = vol.getCommittedBytes();
    long available = free - committed;
    long volumeFreeSpace =
        VolumeUsage.getMinVolumeFreeSpace(vol.getConf(), volumeCapacity);
    boolean hasEnoughSpace =
        available > Math.max(requiredSpace, volumeFreeSpace);

    mostAvailableSpace = Math.max(available, mostAvailableSpace);

    if (!hasEnoughSpace) {
      fullVolumes.put(vol, new AvailableSpace(free, committed));
    }

    return hasEnoughSpace;
  }
{code}

The SCM doesn't take into account the committed space and guesses that the 
datanode is available to allocate new containers but it's not



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@ozone.apache.org
For additional commands, e-mail: issues-h...@ozone.apache.org

Reply via email to