wwlian commented on a change in pull request #4980: [AIRFLOW-3971] Add Google 
Cloud Natural Language operators
URL: https://github.com/apache/airflow/pull/4980#discussion_r270511164
 
 

 ##########
 File path: airflow/contrib/operators/gcp_natural_language_operator.py
 ##########
 @@ -0,0 +1,262 @@
+# -*- coding: utf-8 -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from google.protobuf.json_format import MessageToDict
+
+from airflow.contrib.hooks.gcp_natural_language_hook import 
CloudNaturalLanguageHook
+from airflow.models import BaseOperator
+
+
+class CloudLanguageAnalyzeEntitiesOperator(BaseOperator):
+    """
+    Finds named entities (currently proper names and common nouns) in the text 
along with entity types,
+    salience, mentions for each entity, and other properties.
+
+    .. seealso::
+        For more information on how to use this operator, take a look at the 
guide:
+        :ref:`howto/operator:CloudLanguageAnalyzeEntitiesOperator`
+
+    :param document: Input document.
+        If a dict is provided, it must be of the same form as the protobuf 
message Document
+    :type document: dict or google.cloud.language_v1.types.Document
+    :param encoding_type: The encoding type used by the API to calculate 
offsets.
+    :type encoding_type: google.cloud.language_v1.types.EncodingType
+    :param retry: A retry object used to retry requests. If None is specified, 
requests will not be
+        retried.
+    :param timeout: The amount of time, in seconds, to wait for the request to 
complete. Note that if
+        retry is specified, the timeout applies to each individual attempt.
+    :type timeout: float
+    :param metadata: Additional metadata that is provided to the method.
+    :type metadata: seq[tuple[str, str]]]
+    :param gcp_conn_id: The connection ID to use connecting to Google Cloud 
Platform.
+    :type gcp_conn_id: str
+    """
+
+    # [START natural_langauge_analyze_entities_template_fields]
+    template_fields = ("document", "gcp_conn_id")
+    # [END natural_langauge_analyze_entities_template_fields]
+
+    def __init__(
+        self,
+        document,
+        encoding_type=None,
+        retry=None,
+        timeout=None,
+        metadata=None,
+        gcp_conn_id="google_cloud_default",
+        *args,
+        **kwargs
+    ):
+        super(self, CloudLanguageAnalyzeEntitiesOperator).__init__(*args, 
**kwargs)
+        self.document = document
+        self.encoding_type = encoding_type
+        self.retry = retry
+        self.timeout = timeout
+        self.metadata = metadata
+        self.gcp_conn_id = gcp_conn_id
+
+    def execute(self, context):
+        hook = CloudNaturalLanguageHook(gcp_conn_id=self.gcp_conn_id)
+
+        self.log.info("Start analyzing entities")
+        response = hook.analyze_entities(
+            document=self.document, retry=self.retry, timeout=self.timeout, 
metadata=self.metadata
 
 Review comment:
   Does the natural language client library perform client-side validation of 
the document? For example, to check that a dict-type document maps correctly to 
a Document message? If not, might it make sense to use 
[GcpBodyFieldValidator](https://github.com/apache/airflow/blob/master/airflow/contrib/utils/gcp_field_validator.py)?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to