[
https://issues.apache.org/jira/browse/HAWQ-956?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Oleksandr Diachenko updated HAWQ-956:
-------------------------------------
Description:
As for now COPY command is transactional for native HAWQ tables, but it's not
for external tables.
This command involves communication with underlying HDFS layer which isn't
under HAWQ's control.
If something happens to HDFS during COPY data in table ending up being
corrupted.
STR:
{code}
# Create two external tables:
create writable external table store_t ( a text, b text, c text, d text )
LOCATION ('pxf://localhost:51200/data?Profile=HdfsTextSimple') FORMAT 'TEXT'
(DELIMITER ',');
create external table read_t ( a text, b text, c text, d text ) LOCATION
('pxf://localhost:51200/data?Profile=HdfsTextSimple') FORMAT 'TEXT' (DELIMITER
',');
{code}
# Copy big file(~ 1Gb) from local fs to store_t:
COPY store_table from '/tmp/data/1Gb.txt' DELIMITER ',';
# Restart HDFS while COPY is in progress.
# Run HDFS report, some of files are still open for write:
hdfs fsck / -openforwrite
Picked up _JAVA_OPTIONS: -Xmx2048m -XX:MaxPermSize=512m -Djava.awt.headless=true
16/07/27 15:06:23 WARN util.NativeCodeLoader: Unable to load native-hadoop
library for your platform... using builtin-java classes where applicable
Connecting to namenode via
http://0.0.0.0:50070/fsck?ugi=adiachenko&openforwrite=1&path=%2F
FSCK started by adiachenko (auth:SIMPLE) from /127.0.0.1 for path / at Wed Jul
27 15:06:24 PDT 2016
....................................................................................................
......../data/15137_0 0 bytes, 1 block(s), OPENFORWRITE: /data/15137_1 0 bytes,
1 block(s), OPENFORWRITE: /data/15137_2 0 bytes, 1 block(s), OPENFORWRITE:
/data/15137_3 0 bytes, 1 block(s), OPENFORWRITE: /data/15137_4 0 bytes, 1
block(s), OPENFORWRITE: /data/15137_5 0 bytes, 1 block(s), OPENFORWRITE:
./hbase/MasterProcWALs/state-00000000000000000010.log 0 bytes, 0 block(s),
OPENFORWRITE:
/hbase/WALs/192.168.97.183,60020,1469656307507/192.168.97.183%2C60020%2C1469656307507..meta.1469656315513.meta
83 bytes, 1 block(s), OPENFORWRITE:
/hbase/WALs/192.168.97.183,60020,1469656307507/192.168.97.183%2C60020%2C1469656307507.default.1469656310882
83 bytes, 1 block(s), OPENFORWRITE:
/hbase/WALs/192.168.97.183,60021,1469656309708/192.168.97.183%2C60021%2C1469656309708.default.1469656312207
83 bytes, 1 block(s), OPENFORWRITE:
/hbase/WALs/192.168.97.183,60022,1469656311015/192.168.97.183%2C60022%2C1469656311015.default.1469656313131
83 bytes, 1 block(s), OPENFORWRITE:
................................................................................
....................................................................................................
...
............................................................................Status:
HEALTHY
Total size: 185557141 B
Total dirs: 350
Total files: 30176
Total symlinks: 0
Total blocks (validated): 173 (avg. block size 1072584 B)
Minimally replicated blocks: 163 (94.21965 %)
Over-replicated blocks: 0 (0.0 %)
Under-replicated blocks: 0 (0.0 %)
Mis-replicated blocks: 0 (0.0 %)
Default replication factor: 3
Average block replication: 2.8265896
Corrupt blocks: 0
Missing replicas: 0 (0.0 %)
Number of data-nodes: 3
Number of racks: 1
FSCK ended at Wed Jul 27 15:06:25 PDT 2016 in 1163 milliseconds
The filesystem under path '/' is HEALTHY
was:
As for now COPY command is transactional for native HAWQ tables, but it's not
for external tables.
This command involves communication with underlying HDFS layer which isn't
under HAWQ's control.
If something happens to HDFS during COPY data in table ending up being
corrupted.
STR:
{code}
# Create two external tables:
create writable external table store_t ( a text, b text, c text, d text )
LOCATION ('pxf://localhost:51200/data?Profile=HdfsTextSimple') FORMAT 'TEXT'
(DELIMITER ',');
create external table read_t ( a text, b text, c text, d text ) LOCATION
('pxf://localhost:51200/data?Profile=HdfsTextSimple') FORMAT 'TEXT' (DELIMITER
',');
{code}
# Copy big file(~ 1Gb) from local fs to store_t:
COPY store_table from '/tmp/data/1Gb.txt' DELIMITER ',';
# Restart HDFS while COPY is in progress.
# Run HDFS report, some of files are still open for write:
hdfs fsck / -openforwrite
Picked up _JAVA_OPTIONS: -Xmx2048m -XX:MaxPermSize=512m -Djava.awt.headless=true
16/07/27 15:06:23 WARN util.NativeCodeLoader: Unable to load native-hadoop
library for your platform... using builtin-java classes where applicable
Connecting to namenode via
http://0.0.0.0:50070/fsck?ugi=adiachenko&openforwrite=1&path=%2F
FSCK started by adiachenko (auth:SIMPLE) from /127.0.0.1 for path / at Wed Jul
27 15:06:24 PDT 2016
....................................................................................................
......../data/15137_0 0 bytes, 1 block(s), OPENFORWRITE: /data/15137_1 0 bytes,
1 block(s), OPENFORWRITE: /data/15137_2 0 bytes, 1 block(s), OPENFORWRITE:
/data/15137_3 0 bytes, 1 block(s), OPENFORWRITE: /data/15137_4 0 bytes, 1
block(s), OPENFORWRITE: /data/15137_5 0 bytes, 1 block(s), OPENFORWRITE:
./hbase/MasterProcWALs/state-00000000000000000010.log 0 bytes, 0 block(s),
OPENFORWRITE:
/hbase/WALs/192.168.97.183,60020,1469656307507/192.168.97.183%2C60020%2C1469656307507..meta.1469656315513.meta
83 bytes, 1 block(s), OPENFORWRITE:
/hbase/WALs/192.168.97.183,60020,1469656307507/192.168.97.183%2C60020%2C1469656307507.default.1469656310882
83 bytes, 1 block(s), OPENFORWRITE:
/hbase/WALs/192.168.97.183,60021,1469656309708/192.168.97.183%2C60021%2C1469656309708.default.1469656312207
83 bytes, 1 block(s), OPENFORWRITE:
/hbase/WALs/192.168.97.183,60022,1469656311015/192.168.97.183%2C60022%2C1469656311015.default.1469656313131
83 bytes, 1 block(s), OPENFORWRITE:
................................................................................
....................................................................................................
...
............................................................................Status:
HEALTHY
Total size: 185557141 B
Total dirs: 350
Total files: 30176
Total symlinks: 0
Total blocks (validated): 173 (avg. block size 1072584 B)
Minimally replicated blocks: 163 (94.21965 %)
Over-replicated blocks: 0 (0.0 %)
Under-replicated blocks: 0 (0.0 %)
Mis-replicated blocks: 0 (0.0 %)
Default replication factor: 3
Average block replication: 2.8265896
Corrupt blocks: 0
Missing replicas: 0 (0.0 %)
Number of data-nodes: 3
Number of racks: 1
FSCK ended at Wed Jul 27 15:06:25 PDT 2016 in 1163 milliseconds
The filesystem under path '/' is HEALTHY
> Make COPY command transactional for external tables
> ---------------------------------------------------
>
> Key: HAWQ-956
> URL: https://issues.apache.org/jira/browse/HAWQ-956
> Project: Apache HAWQ
> Issue Type: New Feature
> Components: External Tables, PXF
> Reporter: Oleksandr Diachenko
> Assignee: Goden Yao
> Fix For: backlog
>
>
> As for now COPY command is transactional for native HAWQ tables, but it's not
> for external tables.
> This command involves communication with underlying HDFS layer which isn't
> under HAWQ's control.
> If something happens to HDFS during COPY data in table ending up being
> corrupted.
> STR:
> {code}
> # Create two external tables:
> create writable external table store_t ( a text, b text, c text, d text )
> LOCATION ('pxf://localhost:51200/data?Profile=HdfsTextSimple') FORMAT 'TEXT'
> (DELIMITER ',');
> create external table read_t ( a text, b text, c text, d text ) LOCATION
> ('pxf://localhost:51200/data?Profile=HdfsTextSimple') FORMAT 'TEXT'
> (DELIMITER ',');
> {code}
> # Copy big file(~ 1Gb) from local fs to store_t:
> COPY store_table from '/tmp/data/1Gb.txt' DELIMITER ',';
> # Restart HDFS while COPY is in progress.
> # Run HDFS report, some of files are still open for write:
> hdfs fsck / -openforwrite
> Picked up _JAVA_OPTIONS: -Xmx2048m -XX:MaxPermSize=512m
> -Djava.awt.headless=true
> 16/07/27 15:06:23 WARN util.NativeCodeLoader: Unable to load native-hadoop
> library for your platform... using builtin-java classes where applicable
> Connecting to namenode via
> http://0.0.0.0:50070/fsck?ugi=adiachenko&openforwrite=1&path=%2F
> FSCK started by adiachenko (auth:SIMPLE) from /127.0.0.1 for path / at Wed
> Jul 27 15:06:24 PDT 2016
> ....................................................................................................
> ......../data/15137_0 0 bytes, 1 block(s), OPENFORWRITE: /data/15137_1 0
> bytes, 1 block(s), OPENFORWRITE: /data/15137_2 0 bytes, 1 block(s),
> OPENFORWRITE: /data/15137_3 0 bytes, 1 block(s), OPENFORWRITE: /data/15137_4
> 0 bytes, 1 block(s), OPENFORWRITE: /data/15137_5 0 bytes, 1 block(s),
> OPENFORWRITE: ./hbase/MasterProcWALs/state-00000000000000000010.log 0 bytes,
> 0 block(s), OPENFORWRITE:
> /hbase/WALs/192.168.97.183,60020,1469656307507/192.168.97.183%2C60020%2C1469656307507..meta.1469656315513.meta
> 83 bytes, 1 block(s), OPENFORWRITE:
> /hbase/WALs/192.168.97.183,60020,1469656307507/192.168.97.183%2C60020%2C1469656307507.default.1469656310882
> 83 bytes, 1 block(s), OPENFORWRITE:
> /hbase/WALs/192.168.97.183,60021,1469656309708/192.168.97.183%2C60021%2C1469656309708.default.1469656312207
> 83 bytes, 1 block(s), OPENFORWRITE:
> /hbase/WALs/192.168.97.183,60022,1469656311015/192.168.97.183%2C60022%2C1469656311015.default.1469656313131
> 83 bytes, 1 block(s), OPENFORWRITE:
> ................................................................................
> ....................................................................................................
> ...
> ............................................................................Status:
> HEALTHY
> Total size: 185557141 B
> Total dirs: 350
> Total files: 30176
> Total symlinks: 0
> Total blocks (validated): 173 (avg. block size 1072584 B)
> Minimally replicated blocks: 163 (94.21965 %)
> Over-replicated blocks: 0 (0.0 %)
> Under-replicated blocks: 0 (0.0 %)
> Mis-replicated blocks: 0 (0.0 %)
> Default replication factor: 3
> Average block replication: 2.8265896
> Corrupt blocks: 0
> Missing replicas: 0 (0.0 %)
> Number of data-nodes: 3
> Number of racks: 1
> FSCK ended at Wed Jul 27 15:06:25 PDT 2016 in 1163 milliseconds
> The filesystem under path '/' is HEALTHY
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)