[
https://issues.apache.org/jira/browse/HIVE-23956?focusedWorklogId=465603&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-465603
]
ASF GitHub Bot logged work on HIVE-23956:
-----------------------------------------
Author: ASF GitHub Bot
Created on: 03/Aug/20 09:38
Start Date: 03/Aug/20 09:38
Worklog Time Spent: 10m
Work Description: pvary commented on a change in pull request #1339:
URL: https://github.com/apache/hive/pull/1339#discussion_r464304647
##########
File path:
llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestOrcMetadataCache.java
##########
@@ -250,18 +255,71 @@ public void testGetOrcTailForPath() throws Exception {
Configuration jobConf = new Configuration();
Configuration daemonConf = new Configuration();
CacheTag tag = CacheTag.build("test-table");
- OrcTail uncached = OrcEncodedDataReader.getOrcTailForPath(path, jobConf,
tag, daemonConf, cache);
+ OrcTail uncached = OrcEncodedDataReader.getOrcTailForPath(path, jobConf,
tag, daemonConf, cache, null);
jobConf.set(HiveConf.ConfVars.LLAP_IO_CACHE_ONLY.varname, "true");
- OrcTail cached = OrcEncodedDataReader.getOrcTailForPath(path, jobConf,
tag, daemonConf, cache);
+ OrcTail cached = OrcEncodedDataReader.getOrcTailForPath(path, jobConf,
tag, daemonConf, cache, null);
assertEquals(uncached.getSerializedTail(), cached.getSerializedTail());
assertEquals(uncached.getFileTail(), cached.getFileTail());
}
+ @Test
+ public void testGetOrcTailForPathWithFileId() throws Exception {
+ DummyMemoryManager mm = new DummyMemoryManager();
+ DummyCachePolicy cp = new DummyCachePolicy();
+ final int MAX_ALLOC = 64;
+ LlapDaemonCacheMetrics metrics = LlapDaemonCacheMetrics.create("", "");
+ BuddyAllocator alloc = new BuddyAllocator(
+ false, false, 8, MAX_ALLOC, 1, 4096, 0, null, mm, metrics, null, true);
+ MetadataCache cache = new MetadataCache(alloc, mm, cp, true, metrics);
+
+ Path path = new Path("../data/files/alltypesorc");
+ Configuration jobConf = new Configuration();
+ Configuration daemonConf = new Configuration();
+ CacheTag tag = CacheTag.build("test-table");
+ FileSystem fs = FileSystem.get(daemonConf);
+ FileStatus fileStatus = fs.getFileStatus(path);
+ OrcTail uncached =
OrcEncodedDataReader.getOrcTailForPath(fileStatus.getPath(), jobConf, tag,
daemonConf, cache, new SyntheticFileId(fileStatus));
+ jobConf.set(HiveConf.ConfVars.LLAP_IO_CACHE_ONLY.varname, "true");
+ // this should work from the cache, by recalculating the same fileId
+ OrcTail cached =
OrcEncodedDataReader.getOrcTailForPath(fileStatus.getPath(), jobConf, tag,
daemonConf, cache, null);
+ assertEquals(uncached.getSerializedTail(), cached.getSerializedTail());
+ assertEquals(uncached.getFileTail(), cached.getFileTail());
+ }
+
+ @Test
+ public void testGetOrcTailForPathWithFileIdChange() throws Exception {
+ DummyMemoryManager mm = new DummyMemoryManager();
+ DummyCachePolicy cp = new DummyCachePolicy();
+ final int MAX_ALLOC = 64;
+ LlapDaemonCacheMetrics metrics = LlapDaemonCacheMetrics.create("", "");
+ BuddyAllocator alloc = new BuddyAllocator(
+ false, false, 8, MAX_ALLOC, 1, 4096, 0, null, mm, metrics, null, true);
+ MetadataCache cache = new MetadataCache(alloc, mm, cp, true, metrics);
+
+ Path path = new Path("../data/files/alltypesorc");
+ Configuration jobConf = new Configuration();
+ Configuration daemonConf = new Configuration();
+ CacheTag tag = CacheTag.build("test-table");
+ OrcEncodedDataReader.getOrcTailForPath(path, jobConf, tag, daemonConf,
cache, new SyntheticFileId(path, 100, 100));
+ jobConf.set(HiveConf.ConfVars.LLAP_IO_CACHE_ONLY.varname, "true");
+ Exception ex = null;
+ try {
+ // this should miss the cache, since the fileKey changed
+ OrcEncodedDataReader.getOrcTailForPath(path, jobConf, tag, daemonConf,
cache, new SyntheticFileId(path, 100, 101));
+ } catch (IOException e) {
+ ex = e;
+ }
+ Assert.assertNotNull(ex);
+
Assert.assertTrue(ex.getMessage().contains(HiveConf.ConfVars.LLAP_IO_CACHE_ONLY.varname));
+ }
+
+
Review comment:
nit: too many newline. If we need any fix, please remove them
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
Issue Time Tracking
-------------------
Worklog Id: (was: 465603)
Time Spent: 2h 40m (was: 2.5h)
> Delete delta directory file information should be pushed to execution side
> --------------------------------------------------------------------------
>
> Key: HIVE-23956
> URL: https://issues.apache.org/jira/browse/HIVE-23956
> Project: Hive
> Issue Type: Improvement
> Reporter: Peter Varga
> Assignee: Peter Varga
> Priority: Major
> Labels: pull-request-available
> Time Spent: 2h 40m
> Remaining Estimate: 0h
>
> Since HIVE-23840 LLAP cache is used to retrieve the tail of the ORC bucket
> files in the delete deltas, but to use the cache the fileId must be
> determined, so one more FileSystem call is issued for each bucket.
> This fileId is already available during compilation in the AcidState
> calculation, we should serialise this to the OrcSplit, and remove the
> unnecessary FS calls.
> Furthermore instead of sending the SyntheticFileId directly, we should pass
> the attemptId instead of the standard path hash, this way the path and the
> SyntheticFileId. can be calculated, and it will work even, if the move free
> delete operations will be introduced.
--
This message was sent by Atlassian Jira
(v8.3.4#803005)