[
https://issues.apache.org/jira/browse/HIVE-24884?focusedWorklogId=774350&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-774350
]
ASF GitHub Bot logged work on HIVE-24884:
-----------------------------------------
Author: ASF GitHub Bot
Created on: 25/May/22 04:14
Start Date: 25/May/22 04:14
Worklog Time Spent: 10m
Work Description: maheshk114 commented on code in PR #3293:
URL: https://github.com/apache/hive/pull/3293#discussion_r876521344
##########
ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/DumpMetaData.java:
##########
@@ -35,33 +40,64 @@
import java.util.Arrays;
import java.util.List;
+@JsonIgnoreProperties(ignoreUnknown = true)
public class DumpMetaData {
// wrapper class for reading and writing metadata about a dump
// responsible for _dumpmetadata files
public static final String DUMP_METADATA = "_dumpmetadata";
+
+ // New version of dump metadata file to store top level dumpmetadata content
in JSON format
+ public static final String DUMP_METADATA_V2 = "_dumpmetadata_v2";
private static final Logger LOG =
LoggerFactory.getLogger(DumpMetaData.class);
+ private static ObjectMapper JSON_OBJECT_MAPPER = new ObjectMapper(); //
Thread-safe.
+ @JsonProperty
private DumpType dumpType;
+ @JsonProperty
private Long eventFrom = null;
+ @JsonProperty
private Long eventTo = null;
+ @JsonProperty
private Path cmRoot;
+ @JsonProperty
private String payload = null;
- private ReplScope replScope = null;
-
- private boolean initialized = false;
- private final Path dumpFile;
- private final HiveConf hiveConf;
+ @JsonProperty
private Long dumpExecutionId;
+ @JsonProperty
private boolean replScopeModified = false;
+ @JsonProperty
+ private String replScopeStr = null;
+ //Ignore rest of the properties
+ @JsonIgnore
+ private ReplScope replScope = null;
+ @JsonIgnore
+ private Path dumpFile;
+ @JsonIgnore
+ private final HiveConf hiveConf;
+ @JsonIgnore
+ private boolean isTopLevel;
+ @JsonIgnore
+ private Path dumpRoot;
+ @JsonIgnore
+ private boolean initialized = false;
+
+ public DumpMetaData() {
+ //to be instantiated by JSON ObjectMapper.
+ hiveConf = null;
+ }
public DumpMetaData(Path dumpRoot, HiveConf hiveConf) {
- this.hiveConf = hiveConf;
- dumpFile = new Path(dumpRoot, DUMP_METADATA);
+ this(dumpRoot, hiveConf, false);
}
+ public DumpMetaData(Path dumpRoot, HiveConf hiveConf, boolean isTopLevel) {
+ this.dumpRoot = dumpRoot;
+ this.hiveConf = hiveConf;
+ this.isTopLevel = isTopLevel;
+ }
public DumpMetaData(Path dumpRoot, DumpType lvl, Long eventFrom, Long
eventTo, Path cmRoot,
Review Comment:
Where is it used now ..dumping to normal files ?
##########
ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/DumpMetaData.java:
##########
@@ -35,33 +40,64 @@
import java.util.Arrays;
import java.util.List;
+@JsonIgnoreProperties(ignoreUnknown = true)
public class DumpMetaData {
// wrapper class for reading and writing metadata about a dump
// responsible for _dumpmetadata files
public static final String DUMP_METADATA = "_dumpmetadata";
+
+ // New version of dump metadata file to store top level dumpmetadata content
in JSON format
+ public static final String DUMP_METADATA_V2 = "_dumpmetadata_v2";
private static final Logger LOG =
LoggerFactory.getLogger(DumpMetaData.class);
+ private static ObjectMapper JSON_OBJECT_MAPPER = new ObjectMapper(); //
Thread-safe.
+ @JsonProperty
private DumpType dumpType;
+ @JsonProperty
private Long eventFrom = null;
+ @JsonProperty
private Long eventTo = null;
+ @JsonProperty
private Path cmRoot;
+ @JsonProperty
private String payload = null;
- private ReplScope replScope = null;
-
- private boolean initialized = false;
- private final Path dumpFile;
- private final HiveConf hiveConf;
+ @JsonProperty
private Long dumpExecutionId;
+ @JsonProperty
private boolean replScopeModified = false;
+ @JsonProperty
+ private String replScopeStr = null;
+ //Ignore rest of the properties
+ @JsonIgnore
+ private ReplScope replScope = null;
+ @JsonIgnore
+ private Path dumpFile;
+ @JsonIgnore
+ private final HiveConf hiveConf;
+ @JsonIgnore
+ private boolean isTopLevel;
+ @JsonIgnore
+ private Path dumpRoot;
+ @JsonIgnore
+ private boolean initialized = false;
+
+ public DumpMetaData() {
+ //to be instantiated by JSON ObjectMapper.
+ hiveConf = null;
+ }
public DumpMetaData(Path dumpRoot, HiveConf hiveConf) {
- this.hiveConf = hiveConf;
- dumpFile = new Path(dumpRoot, DUMP_METADATA);
+ this(dumpRoot, hiveConf, false);
}
+ public DumpMetaData(Path dumpRoot, HiveConf hiveConf, boolean isTopLevel) {
Review Comment:
what is isTopLevel means ?
##########
ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/DumpMetaData.java:
##########
@@ -117,6 +153,32 @@ private void readReplScope(String line) throws IOException
{
}
private void loadDumpFromFile() throws SemanticException {
+ boolean isInJSONFormat = resolveDumpFilePathAndGetIfV2();
+ if (isInJSONFormat) {
+ loadDumpFromFileV2();
+ } else {
+ loadDumpFromFileV1();
+ }
+ }
+
+ //Returns true if dumpmetaData is in V2 Format
+ private boolean resolveDumpFilePathAndGetIfV2() throws SemanticException {
+ if (isTopLevel) {
+ dumpFile = new Path(dumpRoot, DUMP_METADATA_V2);
+ if (Utils.fileExists(dumpFile, hiveConf)) {
+ return true;
+ }
+ //Backward-compatibility: fall back to old version. Dump might be
generated by old version
+ dumpFile = new Path(dumpRoot, DUMP_METADATA);
+ LOG.info("Falling back to old version of dump meta data {}", dumpFile);
+ } else {
+ // The nested level _dumpmetadata file content is still in old format:
To save JSON parsing cost.
+ dumpFile = new Path(dumpRoot, DUMP_METADATA);
+ }
+ return false;
+ }
+
+ private void loadDumpFromFileV1() throws SemanticException {
BufferedReader br = null;
try {
Review Comment:
is there any tests to verify this path ?
Issue Time Tracking
-------------------
Worklog Id: (was: 774350)
Time Spent: 1h (was: 50m)
> Move top level dump metadata content to be in JSON format
> ---------------------------------------------------------
>
> Key: HIVE-24884
> URL: https://issues.apache.org/jira/browse/HIVE-24884
> Project: Hive
> Issue Type: Task
> Reporter: Pravin Sinha
> Assignee: Pravin Sinha
> Priority: Major
> Labels: pull-request-available
> Time Spent: 1h
> Remaining Estimate: 0h
>
> {color:#172b4d}The current content for _dumpmetadata file is TAB separated.
> This is not very flexible for extension. A more flexible format like JSON
> based content would be helpful for extending the content.{color}
--
This message was sent by Atlassian Jira
(v8.20.7#820007)