[ https://issues.apache.org/jira/browse/HIVE-7934?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Xiaomeng Huang updated HIVE-7934: --------------------------------- Description: Now HIVE-6329 is a framework of column level encryption/decryption. But the implementation in HIVE-6329 is just use Base64, it is not safe and have some problems: - Base64WriteOnly just be able to get the ciphertext from client for any users. - Base64Rewriter just be able to get plaintext from client for any users. I have an improvement based on HIVE-6329 using key management via kms. # setup kms and set kms-acls.xml (e.g. user1 and root has permission to get key) {code} <property> <name>hadoop.kms.acl.GET</name> <value>user1 root</value> <description> ACL for get-key-version and get-current-key operations. </description> </property> {code} # set hive-site.xml {code} <property> <name>hadoop.security.kms.uri</name> <value>http://localhost:16000/kms</value> </property> {code} # create an encrypted table {code} -- region-aes-column.q drop table region_aes_column; create table region_aes_column (r_regionkey int, r_name string) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES ('column.encode.columns'='r_name', 'column.encode.classname'='org.apache.hadoop.hive.serde2.aes.AESRewriter') STORED AS TEXTFILE TBLPROPERTIES("hive.encrypt.keynames"="hive.k1"); insert overwrite table region_aes_column select r_regionkey, r_name from region; {code} # query table by different user, this is transparent to users. It is very convenient and don't need to set anything. {code} [root@huang1 hive_data]# hive hive> select * from region_aes_column; OK 0 AFRICA 1 AMERICA 2 ASIA 3 EUROPE 4 MIDDLE EAST Time taken: 0.9 seconds, Fetched: 5 row(s) [root@huang1 hive_data]# su user1 [user1@huang1 hive_data]$ hive hive> select * from region_aes_column; OK 0 AFRICA 1 AMERICA 2 ASIA 3 EUROPE 4 MIDDLE EAST Time taken: 0.899 seconds, Fetched: 5 row(s) [root@huang1 hive_data]# su user2 [user2@huang1 hive_data]$ hive hive> select * from region_aes_column; OK 0 RcQycWVD 1 Rc8lam9Bxg== 2 RdEpeQ== 3 Qdcyd3ZH 4 ScskfGpHp8KIIuY= Time taken: 0.749 seconds, Fetched: 5 row(s) {code} was: Now HIVE-6329 is a framework of column level encryption/decryption. But the implementation in HIVE-6329 is just use Base64, it is not safe and have some problems: - Base64WriteOnly just be able to get the ciphertext from client for any users. - Base64Rewriter just be able to get plaintext from client for any users. I have an improvement based on HIVE-6329 using key management via kms. # setup kms and set kms-acls.xml (e.g. user1 and root has permission to get key) {code} <property> <name>hadoop.kms.acl.GET</name> <value>user1 root</value> <description> ACL for get-key-version and get-current-key operations. </description> </property> {code} # create an encrypted table {code} -- region-aes-column.q drop table region_aes_column; create table region_aes_column (r_regionkey int, r_name string) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES ('column.encode.columns'='r_name', 'column.encode.classname'='org.apache.hadoop.hive.serde2.aes.AESRewriter') STORED AS TEXTFILE TBLPROPERTIES("hive.encrypt.keynames"="hive.k1"); insert overwrite table region_aes_column select r_regionkey, r_name from region; {code} # query table by different user, this is transparent to users. It is very convenient and don't need to set anything. {code} [root@huang1 hive_data]# hive hive> select * from region_aes_column; OK 0 AFRICA 1 AMERICA 2 ASIA 3 EUROPE 4 MIDDLE EAST Time taken: 0.9 seconds, Fetched: 5 row(s) [root@huang1 hive_data]# su user1 [user1@huang1 hive_data]$ hive hive> select * from region_aes_column; OK 0 AFRICA 1 AMERICA 2 ASIA 3 EUROPE 4 MIDDLE EAST Time taken: 0.899 seconds, Fetched: 5 row(s) [root@huang1 hive_data]# su user2 [user2@huang1 hive_data]$ hive hive> select * from region_aes_column; OK 0 RcQycWVD 1 Rc8lam9Bxg== 2 RdEpeQ== 3 Qdcyd3ZH 4 ScskfGpHp8KIIuY= Time taken: 0.749 seconds, Fetched: 5 row(s) {code} > Improve column level encryption with key management > --------------------------------------------------- > > Key: HIVE-7934 > URL: https://issues.apache.org/jira/browse/HIVE-7934 > Project: Hive > Issue Type: Improvement > Reporter: Xiaomeng Huang > Assignee: Xiaomeng Huang > Priority: Minor > > Now HIVE-6329 is a framework of column level encryption/decryption. But the > implementation in HIVE-6329 is just use Base64, it is not safe and have some > problems: > - Base64WriteOnly just be able to get the ciphertext from client for any > users. > - Base64Rewriter just be able to get plaintext from client for any users. > I have an improvement based on HIVE-6329 using key management via kms. > # setup kms and set kms-acls.xml (e.g. user1 and root has permission to get > key) > {code} > <property> > <name>hadoop.kms.acl.GET</name> > <value>user1 root</value> > <description> > ACL for get-key-version and get-current-key operations. > </description> > </property> > {code} > # set hive-site.xml > {code} > <property> > <name>hadoop.security.kms.uri</name> > <value>http://localhost:16000/kms</value> > </property> > {code} > # create an encrypted table > {code} > -- region-aes-column.q > drop table region_aes_column; > create table region_aes_column (r_regionkey int, r_name string) ROW FORMAT > SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' > WITH SERDEPROPERTIES ('column.encode.columns'='r_name', > 'column.encode.classname'='org.apache.hadoop.hive.serde2.aes.AESRewriter') > STORED AS TEXTFILE TBLPROPERTIES("hive.encrypt.keynames"="hive.k1"); > insert overwrite table region_aes_column > select > r_regionkey, r_name > from region; > {code} > # query table by different user, this is transparent to users. It is very > convenient and don't need to set anything. > {code} > [root@huang1 hive_data]# hive > hive> select * from region_aes_column; > OK > 0 AFRICA > 1 AMERICA > 2 ASIA > 3 EUROPE > 4 MIDDLE EAST > Time taken: 0.9 seconds, Fetched: 5 row(s) > [root@huang1 hive_data]# su user1 > [user1@huang1 hive_data]$ hive > hive> select * from region_aes_column; > OK > 0 AFRICA > 1 AMERICA > 2 ASIA > 3 EUROPE > 4 MIDDLE EAST > Time taken: 0.899 seconds, Fetched: 5 row(s) > [root@huang1 hive_data]# su user2 > [user2@huang1 hive_data]$ hive > hive> select * from region_aes_column; > OK > 0 RcQycWVD > 1 Rc8lam9Bxg== > 2 RdEpeQ== > 3 Qdcyd3ZH > 4 ScskfGpHp8KIIuY= > Time taken: 0.749 seconds, Fetched: 5 row(s) > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)