tvalentyn commented on a change in pull request #16929:
URL: https://github.com/apache/beam/pull/16929#discussion_r817002170



##########
File path: sdks/python/apache_beam/options/pipeline_options.py
##########
@@ -180,7 +190,16 @@ def __init__(self, flags=None, **kwargs):
       flags: An iterable of command line arguments to be used. If not specified
         then sys.argv will be used as input for parsing arguments.
 
-      **kwargs: Add overrides for arguments passed in flags.
+      **kwargs: Add overrides for arguments passed in flags. For kwargs,
+                please pass the option names instead of flag names.
+                Option names: These are defined as dest in the
+                parser.add_argument(). Passing flag names like

Review comment:
       revise this as well.

##########
File path: sdks/python/apache_beam/options/pipeline_options.py
##########
@@ -234,12 +262,27 @@ def from_dictionary(cls, options):
 
     Returns:
       A PipelineOptions object representing the given arguments.
+
+    Note: If a boolean flag is True in the dictionary,
+          implicitly the method assumes the boolean flag is
+          specified as a command line argument. If the
+          boolean flag is False, this method simply discards
+          them.
+    Eg: {no_auth: True} is similar to python your_file.py --no_auth
+        {no_auth: False} is similar to python your_file.py.
     """
     flags = []
     for k, v in options.items():
       if isinstance(v, bool):
         if v:
           flags.append('--%s' % k)
+        # capture boolean flags with 3 values
+        # {default=None, True, False}
+        elif k in _STORE_FALSE_OPTIONS_WITH_DIFFERENT_DEST:
+          _LOGGER.warning(

Review comment:
       We don't need the warning, as the method should be supporting this 
use-case (it's a bug that it currently does not).

##########
File path: sdks/python/apache_beam/options/pipeline_options_test.py
##########
@@ -647,6 +650,60 @@ def test_dataflow_service_options(self):
     self.assertEqual(
         options.get_all_options()['dataflow_service_options'], None)
 
+  def test_options_store_false_with_different_dest(self):
+    parser = _BeamArgumentParser()
+    for cls in PipelineOptions.__subclasses__():
+      cls._add_argparse_args(parser)
+
+    actions = parser._actions.copy()
+    dest_to_options = {}
+    options_diff_dest_store_true = {}
+
+    for i in range(len(actions)):
+      options_name = actions[i].option_strings
+      dest = actions[i].dest
+
+      if isinstance(actions[i].const, bool):
+        for option_name in options_name:

Review comment:
       flag_name / flag_names

##########
File path: sdks/python/apache_beam/options/pipeline_options.py
##########
@@ -55,6 +55,16 @@
 
 _LOGGER = logging.getLogger(__name__)
 
+# These options have no dest and action is store_false in the
+# argparse and default is None. When parsing these options in a dict using
+# PipelineOptions,We either ignore/discard if these options are specified.

Review comment:
       ```suggestion
   # PipelineOptions(), we either ignore/discard if these options are specified.
   ```

##########
File path: sdks/python/apache_beam/options/pipeline_options.py
##########
@@ -234,12 +262,27 @@ def from_dictionary(cls, options):
 
     Returns:
       A PipelineOptions object representing the given arguments.
+
+    Note: If a boolean flag is True in the dictionary,
+          implicitly the method assumes the boolean flag is
+          specified as a command line argument. If the
+          boolean flag is False, this method simply discards
+          them.
+    Eg: {no_auth: True} is similar to python your_file.py --no_auth
+        {no_auth: False} is similar to python your_file.py.
     """
     flags = []
     for k, v in options.items():
       if isinstance(v, bool):

Review comment:
       Let's move the `Note:` here.

##########
File path: sdks/python/apache_beam/options/pipeline_options_test.py
##########
@@ -647,6 +650,60 @@ def test_dataflow_service_options(self):
     self.assertEqual(
         options.get_all_options()['dataflow_service_options'], None)
 
+  def test_options_store_false_with_different_dest(self):
+    parser = _BeamArgumentParser()
+    for cls in PipelineOptions.__subclasses__():
+      cls._add_argparse_args(parser)
+
+    actions = parser._actions.copy()
+    dest_to_options = {}
+    options_diff_dest_store_true = {}
+
+    for i in range(len(actions)):
+      options_name = actions[i].option_strings

Review comment:
       s/options_name/flag_names

##########
File path: sdks/python/apache_beam/options/pipeline_options.py
##########
@@ -212,6 +231,15 @@ def __init__(self, flags=None, **kwargs):
     # Users access this dictionary store via __getattr__ / __setattr__ methods.
     self._all_options = kwargs
 
+    if self.__class__ != PipelineOptions:
+      _invalid_options = {}
+      for option_name, option_value in self._all_options.items():
+        if option_name not in self._visible_option_list():
+          _invalid_options[option_name] = option_value
+
+      if _invalid_options:
+        _LOGGER.warning("Discarding invalid overrides: %s", _invalid_options)

Review comment:
       sounds like this warning will be still printed in get_all_options() call 
at pipeline submission, so we can omit it here.

##########
File path: sdks/python/apache_beam/options/pipeline_options.py
##########
@@ -353,6 +396,15 @@ def view_as(self, cls):
 
     """
     view = cls(self._flags)
+
+    _invalid_options = {}
+    for option_name, option_value in self._all_options.items():
+      if option_name not in self._visible_option_list():
+        _invalid_options[option_name] = option_value
+
+    if _invalid_options:
+      _LOGGER.warning("Discarding invalid overrides: %s", _invalid_options)

Review comment:
       We could also omit this. 

##########
File path: sdks/python/apache_beam/options/pipeline_options_test.py
##########
@@ -647,6 +650,60 @@ def test_dataflow_service_options(self):
     self.assertEqual(
         options.get_all_options()['dataflow_service_options'], None)
 
+  def test_options_store_false_with_different_dest(self):
+    parser = _BeamArgumentParser()
+    for cls in PipelineOptions.__subclasses__():
+      cls._add_argparse_args(parser)
+
+    actions = parser._actions.copy()
+    dest_to_options = {}
+    options_diff_dest_store_true = {}
+
+    for i in range(len(actions)):
+      options_name = actions[i].option_strings
+      dest = actions[i].dest

Review comment:
       consider: s/dest/option_name

##########
File path: sdks/python/apache_beam/options/pipeline_options_test.py
##########
@@ -647,6 +650,60 @@ def test_dataflow_service_options(self):
     self.assertEqual(
         options.get_all_options()['dataflow_service_options'], None)
 
+  def test_options_store_false_with_different_dest(self):
+    parser = _BeamArgumentParser()
+    for cls in PipelineOptions.__subclasses__():
+      cls._add_argparse_args(parser)
+
+    actions = parser._actions.copy()
+    dest_to_options = {}

Review comment:
       dest_to_flags

##########
File path: sdks/python/apache_beam/options/pipeline_options.py
##########
@@ -234,12 +262,27 @@ def from_dictionary(cls, options):
 
     Returns:
       A PipelineOptions object representing the given arguments.
+
+    Note: If a boolean flag is True in the dictionary,
+          implicitly the method assumes the boolean flag is
+          specified as a command line argument. If the
+          boolean flag is False, this method simply discards
+          them.
+    Eg: {no_auth: True} is similar to python your_file.py --no_auth
+        {no_auth: False} is similar to python your_file.py.
     """
     flags = []
     for k, v in options.items():
       if isinstance(v, bool):
         if v:
           flags.append('--%s' % k)
+        # capture boolean flags with 3 values
+        # {default=None, True, False}
+        elif k in _STORE_FALSE_OPTIONS_WITH_DIFFERENT_DEST:
+          _LOGGER.warning(
+              "Instead of %s=%s, please provide %s=%s" %
+              (k, v, _STORE_FALSE_OPTIONS_WITH_DIFFERENT_DEST[k], True))
+          flags.append('--%s' % _STORE_FALSE_OPTIONS_WITH_DIFFERENT_DEST[k])

Review comment:
       suggestion for better readability:
   ```
   flag_that_disables_the_option = _STORE_FALSE_OPTIONS_WITH_DIFFERENT_DEST[k]
   flags.append('--%s' % flag_that_disables_the_option)
   ```

##########
File path: sdks/python/apache_beam/options/pipeline_options.py
##########
@@ -55,6 +55,16 @@
 
 _LOGGER = logging.getLogger(__name__)
 
+# These options have no dest and action is store_false in the
+# argparse and default is None. When parsing these options in a dict using
+# PipelineOptions,We either ignore/discard if these options are specified.

Review comment:
       as discussed, please revise the comment.

##########
File path: sdks/python/apache_beam/options/pipeline_options_test.py
##########
@@ -647,6 +650,60 @@ def test_dataflow_service_options(self):
     self.assertEqual(
         options.get_all_options()['dataflow_service_options'], None)
 
+  def test_options_store_false_with_different_dest(self):
+    parser = _BeamArgumentParser()
+    for cls in PipelineOptions.__subclasses__():
+      cls._add_argparse_args(parser)
+
+    actions = parser._actions.copy()
+    dest_to_options = {}
+    options_diff_dest_store_true = {}
+
+    for i in range(len(actions)):
+      options_name = actions[i].option_strings
+      dest = actions[i].dest
+
+      if isinstance(actions[i].const, bool):
+        for option_name in options_name:
+          option_name = option_name.strip(
+              '--') if '--' in option_name else option_name
+          if option_name != dest:
+            # Capture flags which has store_action=True and has a
+            # different dest. This behavior would be confusing.
+            if actions[i].const:
+              options_diff_dest_store_true[option_name] = dest
+              continue
+            # check the flags like no_use_public_ips
+            # default is None, action is {True, False}
+            if actions[i].default is None:
+              dest_to_options[dest] = option_name
+
+    assert len(options_diff_dest_store_true) == 0, (
+      _LOGGER.error("There should be no flags that have a dest "
+                    "different from flag name and action as "
+                    "store_true. It would be confusing "
+                    "to the user. Please specify the dest as the "
+                    "flag_name instead.")
+    )
+    from apache_beam.options.pipeline_options import (
+        _STORE_FALSE_OPTIONS_WITH_DIFFERENT_DEST)
+
+    def get_options_not_present_in_map(d1, d2):
+      d = {}
+      for k in d1:
+        if k not in d2:
+          d[k] = d1[k]
+      return d
+
+    assert _STORE_FALSE_OPTIONS_WITH_DIFFERENT_DEST == dest_to_options, (
+      "If you are adding a new boolean flag with default=None,"
+      " with dest different from flag name, please add the flag and "
+      "dest of the flag: %s to variable "
+      " _STORE_FALSE_OPTIONS_WITH_DIFFERENT_DEST in PipelineOptions.py" % (

Review comment:
       I think we can simplify this to use `self.assertDictEqual()` with 
appropriate error message.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to