[GitHub] [airflow] vincbeck commented on a diff in pull request #33219: Added Amazon SageMaker Notebook hook and operators

2023-08-16 Thread via GitHub


vincbeck commented on code in PR #33219:
URL: https://github.com/apache/airflow/pull/33219#discussion_r1295980649


##
airflow/providers/amazon/aws/operators/sagemaker.py:
##
@@ -1523,3 +1524,243 @@ def execute(self, context: Context) -> str:
 arn = ans["ExperimentArn"]
 self.log.info("Experiment %s created successfully with ARN %s.", 
self.name, arn)
 return arn
+
+
+class SageMakerCreateNotebookOperator(BaseOperator):
+"""
+Create a SageMaker notebook.
+
+More information regarding parameters of this operator can be found here
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker/client/create_notebook_instance.html.
+
+.. seealso:
+For more information on how to use this operator, take a look at the 
guide:
+:ref:`howto/operator:SageMakerCreateNotebookOperator`
+
+:param instance_name: The name of the notebook instance.
+:param instance_type: The type of instance to create.
+:param role_arn: The Amazon Resource Name (ARN) of the IAM role that 
SageMaker can assume to access
+:param volume_size_in_gb: Size in GB of the EBS root device volume of the 
notebook instance.
+:param volume_kms_key_id: The KMS key ID for the EBS root device volume.
+:param lifecycle_config_name: The name of the lifecycle configuration to 
associate with the notebook
+:param direct_internet_access: Whether to enable direct internet access 
for the notebook instance.
+:param root_access: Whether to give the notebook instance root access to 
the Amazon S3 bucket.
+:param wait_for_completion: Whether or not to wait for the notebook to be 
InService before returning
+:param create_instance_kwargs: Additional configuration options for the 
create call.
+:param aws_conn_id: The AWS connection ID to use.
+
+:return: The ARN of the created notebook.
+"""
+
+template_fields: Sequence[str] = (
+"instance_name",
+"instance_type",
+"role_arn",
+"volume_size_in_gb",
+"volume_kms_key_id",
+"lifecycle_config_name",
+"direct_internet_access",
+"root_access",
+"wait_for_completion",
+"create_instance_kwargs",
+)
+
+ui_color = "#ff7300"
+
+def __init__(
+self,
+*,
+instance_name: str,
+instance_type: str,
+role_arn: str,
+volume_size_in_gb: int | None = None,
+volume_kms_key_id: str | None = None,
+lifecycle_config_name: str | None = None,
+direct_internet_access: str | None = None,
+root_access: str | None = None,
+create_instance_kwargs: dict[str, Any] = {},
+wait_for_completion: bool = True,
+aws_conn_id: str = "aws_default",
+**kwargs,
+):
+super().__init__(**kwargs)
+self.instance_name = instance_name
+self.instance_type = instance_type
+self.role_arn = role_arn
+self.volume_size_in_gb = volume_size_in_gb
+self.volume_kms_key_id = volume_kms_key_id
+self.lifecycle_config_name = lifecycle_config_name
+self.direct_internet_access = direct_internet_access
+self.root_access = root_access
+self.wait_for_completion = wait_for_completion
+self.aws_conn_id = aws_conn_id
+self.create_instance_kwargs = create_instance_kwargs
+
+if self.create_instance_kwargs.get("tags") is not None:
+self.create_instance_kwargs["tags"] = 
format_tags(self.create_instance_kwargs["tags"])
+
+@cached_property
+def hook(self) -> SageMakerHook:
+"""Create and return SageMakerHook."""
+return SageMakerHook(aws_conn_id=self.aws_conn_id)
+
+def execute(self, context: Context):
+
+create_notebook_instance_kwargs = {
+"NotebookInstanceName": self.instance_name,
+"InstanceType": self.instance_type,
+"RoleArn": self.role_arn,
+"VolumeSizeInGB": self.volume_size_in_gb,
+"KmsKeyId": self.volume_kms_key_id,
+"LifecycleConfigName": self.lifecycle_config_name,
+"DirectInternetAccess": self.direct_internet_access,
+"RootAccess": self.root_access,
+}
+if len(self.create_instance_kwargs) > 0:
+create_notebook_instance_kwargs.update(self.create_instance_kwargs)
+
+self.log.info("Creating SageMaker notebook %s.", self.instance_name)
+response = 
self.hook.conn.create_notebook_instance(**prune_dict(create_notebook_instance_kwargs))
+
+self.log.info("SageMaker notebook created: %s", 
response["NotebookInstanceArn"])
+
+if self.wait_for_completion:
+self.log.info("Waiting for SageMaker notebook %s to be in 
service", self.instance_name)
+waiter = self.hook.conn.get_waiter("notebook_instance_in_service")
+waiter.wait(NotebookInstanceName=self.instance_name)
+
+return 

[GitHub] [airflow] vincbeck commented on a diff in pull request #33219: Added Amazon SageMaker Notebook hook and operators

2023-08-15 Thread via GitHub


vincbeck commented on code in PR #33219:
URL: https://github.com/apache/airflow/pull/33219#discussion_r1294992440


##
airflow/providers/amazon/aws/operators/sagemaker.py:
##
@@ -1523,3 +1524,246 @@ def execute(self, context: Context) -> str:
 arn = ans["ExperimentArn"]
 self.log.info("Experiment %s created successfully with ARN %s.", 
self.name, arn)
 return arn
+
+
+class SageMakerCreateNotebookOperator(BaseOperator):
+"""
+Create a SageMaker notebook.
+
+More information regarding parameters of this operator can be found here
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker/client/create_notebook_instance.html.
+
+.. seealso:
+For more information on how to use this operator, take a look at the 
guide:
+:ref:`howto/operator:SageMakerCreateNotebookOperator`
+
+:param instance_name: The name of the notebook instance.
+:param instance_type: The type of instance to create.
+:param role_arn: The Amazon Resource Name (ARN) of the IAM role that 
SageMaker can assume to access
+:param volume_size_in_gb: Size in GB of the EBS root device volume of the 
notebook instance.
+:param volume_kms_key_id: The KMS key ID for the EBS root device volume.
+:param lifecycle_config_name: The name of the lifecycle configuration to 
associate with the notebook
+:param direct_internet_access: Whether to enable direct internet access 
for the notebook instance.
+:param root_access: Whether to give the notebook instance root access to 
the Amazon S3 bucket.
+:param wait_for_completion: Whether or not to wait for the notebook to be 
InService before returning
+:param create_instance_kwargs: Additional configuration options for the 
create call.
+:param aws_conn_id: The AWS connection ID to use.
+
+This operator returns The ARN of the created notebook.
+"""
+
+template_fields: Sequence[str] = (
+"instance_name",
+"instance_type",
+"role_arn",
+"volume_size_in_gb",
+"volume_kms_key_id",
+"lifecycle_config_name",
+"direct_internet_access",
+"root_access",
+"wait_for_completion",
+"create_instance_kwargs",
+)
+
+ui_color = "#ff7300"
+
+def __init__(
+self,
+*,
+instance_name: str,
+instance_type: str,
+role_arn: str,
+volume_size_in_gb: int | None = None,
+volume_kms_key_id: str | None = None,
+lifecycle_config_name: str | None = None,
+direct_internet_access: str | None = None,
+root_access: str | None = None,
+create_instance_kwargs: dict[str, Any] = {},
+wait_for_completion: bool = True,
+aws_conn_id: str = "aws_default",
+**kwargs,
+):
+super().__init__(**kwargs)
+self.instance_name = instance_name
+self.instance_type = instance_type
+self.role_arn = role_arn
+self.volume_size_in_gb = volume_size_in_gb
+self.volume_kms_key_id = volume_kms_key_id
+self.lifecycle_config_name = lifecycle_config_name
+self.direct_internet_access = direct_internet_access
+self.root_access = root_access
+self.wait_for_completion = wait_for_completion
+self.aws_conn_id = aws_conn_id
+self.create_instance_kwargs = create_instance_kwargs
+
+if "tags" in self.create_instance_kwargs and 
self.create_instance_kwargs["tags"] is not None:
+self.create_instance_kwargs["tags"] = 
format_tags(self.create_instance_kwargs["tags"])
+
+@cached_property
+def hook(self) -> SageMakerHook:
+"""Create and return SageMakerHook."""
+return SageMakerHook(aws_conn_id=self.aws_conn_id)
+
+def execute(self, context: Context):
+
+create_notebook_instance_kwargs = {
+"NotebookInstanceName": self.instance_name,
+"InstanceType": self.instance_type,
+"RoleArn": self.role_arn,
+"VolumeSizeInGB": self.volume_size_in_gb,
+"KmsKeyId": self.volume_kms_key_id,
+"LifecycleConfigName": self.lifecycle_config_name,
+"DirectInternetAccess": self.direct_internet_access,
+"RootAccess": self.root_access,
+}
+if len(self.create_instance_kwargs) > 0:
+create_notebook_instance_kwargs.update(self.create_instance_kwargs)
+
+self.log.info("Creating SageMaker notebook %s.", self.instance_name)
+response = 
self.hook.conn.create_notebook_instance(**prune_dict(create_notebook_instance_kwargs))
+
+self.log.info("SageMaker notebook created: %s", 
response["NotebookInstanceArn"])
+
+if self.wait_for_completion:
+self.log.info("Waiting for SageMaker notebook %s to be in 
service", self.instance_name)
+waiter = self.hook.conn.get_waiter("notebook_instance_in_service")
+

[GitHub] [airflow] vincbeck commented on a diff in pull request #33219: Added Amazon SageMaker Notebook hook and operators

2023-08-15 Thread via GitHub


vincbeck commented on code in PR #33219:
URL: https://github.com/apache/airflow/pull/33219#discussion_r1294990193


##
airflow/providers/amazon/aws/operators/sagemaker.py:
##
@@ -1523,3 +1524,246 @@ def execute(self, context: Context) -> str:
 arn = ans["ExperimentArn"]
 self.log.info("Experiment %s created successfully with ARN %s.", 
self.name, arn)
 return arn
+
+
+class SageMakerCreateNotebookOperator(BaseOperator):
+"""
+Create a SageMaker notebook.
+
+More information regarding parameters of this operator can be found here
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker/client/create_notebook_instance.html.
+
+.. seealso:
+For more information on how to use this operator, take a look at the 
guide:
+:ref:`howto/operator:SageMakerCreateNotebookOperator`
+
+:param instance_name: The name of the notebook instance.
+:param instance_type: The type of instance to create.
+:param role_arn: The Amazon Resource Name (ARN) of the IAM role that 
SageMaker can assume to access
+:param volume_size_in_gb: Size in GB of the EBS root device volume of the 
notebook instance.
+:param volume_kms_key_id: The KMS key ID for the EBS root device volume.
+:param lifecycle_config_name: The name of the lifecycle configuration to 
associate with the notebook
+:param direct_internet_access: Whether to enable direct internet access 
for the notebook instance.
+:param root_access: Whether to give the notebook instance root access to 
the Amazon S3 bucket.
+:param wait_for_completion: Whether or not to wait for the notebook to be 
InService before returning
+:param create_instance_kwargs: Additional configuration options for the 
create call.
+:param aws_conn_id: The AWS connection ID to use.
+
+This operator returns The ARN of the created notebook.
+"""
+
+template_fields: Sequence[str] = (
+"instance_name",
+"instance_type",
+"role_arn",
+"volume_size_in_gb",
+"volume_kms_key_id",
+"lifecycle_config_name",
+"direct_internet_access",
+"root_access",
+"wait_for_completion",
+"create_instance_kwargs",
+)
+
+ui_color = "#ff7300"
+
+def __init__(
+self,
+*,
+instance_name: str,
+instance_type: str,
+role_arn: str,
+volume_size_in_gb: int | None = None,
+volume_kms_key_id: str | None = None,
+lifecycle_config_name: str | None = None,
+direct_internet_access: str | None = None,
+root_access: str | None = None,
+create_instance_kwargs: dict[str, Any] = {},
+wait_for_completion: bool = True,
+aws_conn_id: str = "aws_default",
+**kwargs,
+):
+super().__init__(**kwargs)
+self.instance_name = instance_name
+self.instance_type = instance_type
+self.role_arn = role_arn
+self.volume_size_in_gb = volume_size_in_gb
+self.volume_kms_key_id = volume_kms_key_id
+self.lifecycle_config_name = lifecycle_config_name
+self.direct_internet_access = direct_internet_access
+self.root_access = root_access
+self.wait_for_completion = wait_for_completion
+self.aws_conn_id = aws_conn_id
+self.create_instance_kwargs = create_instance_kwargs
+
+if "tags" in self.create_instance_kwargs and 
self.create_instance_kwargs["tags"] is not None:

Review Comment:
   nit
   ```suggestion
   if self.create_instance_kwargs.get("tags") is not None:
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@airflow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



[GitHub] [airflow] vincbeck commented on a diff in pull request #33219: Added Amazon SageMaker Notebook hook and operators

2023-08-15 Thread via GitHub


vincbeck commented on code in PR #33219:
URL: https://github.com/apache/airflow/pull/33219#discussion_r1294992979


##
airflow/providers/amazon/aws/operators/sagemaker.py:
##
@@ -1523,3 +1524,246 @@ def execute(self, context: Context) -> str:
 arn = ans["ExperimentArn"]
 self.log.info("Experiment %s created successfully with ARN %s.", 
self.name, arn)
 return arn
+
+
+class SageMakerCreateNotebookOperator(BaseOperator):
+"""
+Create a SageMaker notebook.
+
+More information regarding parameters of this operator can be found here
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker/client/create_notebook_instance.html.
+
+.. seealso:
+For more information on how to use this operator, take a look at the 
guide:
+:ref:`howto/operator:SageMakerCreateNotebookOperator`
+
+:param instance_name: The name of the notebook instance.
+:param instance_type: The type of instance to create.
+:param role_arn: The Amazon Resource Name (ARN) of the IAM role that 
SageMaker can assume to access
+:param volume_size_in_gb: Size in GB of the EBS root device volume of the 
notebook instance.
+:param volume_kms_key_id: The KMS key ID for the EBS root device volume.
+:param lifecycle_config_name: The name of the lifecycle configuration to 
associate with the notebook
+:param direct_internet_access: Whether to enable direct internet access 
for the notebook instance.
+:param root_access: Whether to give the notebook instance root access to 
the Amazon S3 bucket.
+:param wait_for_completion: Whether or not to wait for the notebook to be 
InService before returning
+:param create_instance_kwargs: Additional configuration options for the 
create call.
+:param aws_conn_id: The AWS connection ID to use.
+
+This operator returns The ARN of the created notebook.
+"""
+
+template_fields: Sequence[str] = (
+"instance_name",
+"instance_type",
+"role_arn",
+"volume_size_in_gb",
+"volume_kms_key_id",
+"lifecycle_config_name",
+"direct_internet_access",
+"root_access",
+"wait_for_completion",
+"create_instance_kwargs",
+)
+
+ui_color = "#ff7300"
+
+def __init__(
+self,
+*,
+instance_name: str,
+instance_type: str,
+role_arn: str,
+volume_size_in_gb: int | None = None,
+volume_kms_key_id: str | None = None,
+lifecycle_config_name: str | None = None,
+direct_internet_access: str | None = None,
+root_access: str | None = None,
+create_instance_kwargs: dict[str, Any] = {},
+wait_for_completion: bool = True,
+aws_conn_id: str = "aws_default",
+**kwargs,
+):
+super().__init__(**kwargs)
+self.instance_name = instance_name
+self.instance_type = instance_type
+self.role_arn = role_arn
+self.volume_size_in_gb = volume_size_in_gb
+self.volume_kms_key_id = volume_kms_key_id
+self.lifecycle_config_name = lifecycle_config_name
+self.direct_internet_access = direct_internet_access
+self.root_access = root_access
+self.wait_for_completion = wait_for_completion
+self.aws_conn_id = aws_conn_id
+self.create_instance_kwargs = create_instance_kwargs
+
+if "tags" in self.create_instance_kwargs and 
self.create_instance_kwargs["tags"] is not None:
+self.create_instance_kwargs["tags"] = 
format_tags(self.create_instance_kwargs["tags"])
+
+@cached_property
+def hook(self) -> SageMakerHook:
+"""Create and return SageMakerHook."""
+return SageMakerHook(aws_conn_id=self.aws_conn_id)
+
+def execute(self, context: Context):
+
+create_notebook_instance_kwargs = {
+"NotebookInstanceName": self.instance_name,
+"InstanceType": self.instance_type,
+"RoleArn": self.role_arn,
+"VolumeSizeInGB": self.volume_size_in_gb,
+"KmsKeyId": self.volume_kms_key_id,
+"LifecycleConfigName": self.lifecycle_config_name,
+"DirectInternetAccess": self.direct_internet_access,
+"RootAccess": self.root_access,
+}
+if len(self.create_instance_kwargs) > 0:
+create_notebook_instance_kwargs.update(self.create_instance_kwargs)
+
+self.log.info("Creating SageMaker notebook %s.", self.instance_name)
+response = 
self.hook.conn.create_notebook_instance(**prune_dict(create_notebook_instance_kwargs))
+
+self.log.info("SageMaker notebook created: %s", 
response["NotebookInstanceArn"])
+
+if self.wait_for_completion:
+self.log.info("Waiting for SageMaker notebook %s to be in 
service", self.instance_name)
+waiter = self.hook.conn.get_waiter("notebook_instance_in_service")
+

[GitHub] [airflow] vincbeck commented on a diff in pull request #33219: Added Amazon SageMaker Notebook hook and operators

2023-08-15 Thread via GitHub


vincbeck commented on code in PR #33219:
URL: https://github.com/apache/airflow/pull/33219#discussion_r1294989565


##
airflow/providers/amazon/aws/operators/sagemaker.py:
##
@@ -1523,3 +1524,246 @@ def execute(self, context: Context) -> str:
 arn = ans["ExperimentArn"]
 self.log.info("Experiment %s created successfully with ARN %s.", 
self.name, arn)
 return arn
+
+
+class SageMakerCreateNotebookOperator(BaseOperator):
+"""
+Create a SageMaker notebook.
+
+More information regarding parameters of this operator can be found here
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker/client/create_notebook_instance.html.
+
+.. seealso:
+For more information on how to use this operator, take a look at the 
guide:
+:ref:`howto/operator:SageMakerCreateNotebookOperator`
+
+:param instance_name: The name of the notebook instance.
+:param instance_type: The type of instance to create.
+:param role_arn: The Amazon Resource Name (ARN) of the IAM role that 
SageMaker can assume to access
+:param volume_size_in_gb: Size in GB of the EBS root device volume of the 
notebook instance.
+:param volume_kms_key_id: The KMS key ID for the EBS root device volume.
+:param lifecycle_config_name: The name of the lifecycle configuration to 
associate with the notebook
+:param direct_internet_access: Whether to enable direct internet access 
for the notebook instance.
+:param root_access: Whether to give the notebook instance root access to 
the Amazon S3 bucket.
+:param wait_for_completion: Whether or not to wait for the notebook to be 
InService before returning
+:param create_instance_kwargs: Additional configuration options for the 
create call.
+:param aws_conn_id: The AWS connection ID to use.
+
+This operator returns The ARN of the created notebook.

Review Comment:
   ```suggestion
   :return: The ARN of the created notebook.
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@airflow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



[GitHub] [airflow] vincbeck commented on a diff in pull request #33219: Added Amazon SageMaker Notebook hook and operators

2023-08-15 Thread via GitHub


vincbeck commented on code in PR #33219:
URL: https://github.com/apache/airflow/pull/33219#discussion_r1294992827


##
airflow/providers/amazon/aws/operators/sagemaker.py:
##
@@ -1523,3 +1524,246 @@ def execute(self, context: Context) -> str:
 arn = ans["ExperimentArn"]
 self.log.info("Experiment %s created successfully with ARN %s.", 
self.name, arn)
 return arn
+
+
+class SageMakerCreateNotebookOperator(BaseOperator):
+"""
+Create a SageMaker notebook.
+
+More information regarding parameters of this operator can be found here
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker/client/create_notebook_instance.html.
+
+.. seealso:
+For more information on how to use this operator, take a look at the 
guide:
+:ref:`howto/operator:SageMakerCreateNotebookOperator`
+
+:param instance_name: The name of the notebook instance.
+:param instance_type: The type of instance to create.
+:param role_arn: The Amazon Resource Name (ARN) of the IAM role that 
SageMaker can assume to access
+:param volume_size_in_gb: Size in GB of the EBS root device volume of the 
notebook instance.
+:param volume_kms_key_id: The KMS key ID for the EBS root device volume.
+:param lifecycle_config_name: The name of the lifecycle configuration to 
associate with the notebook
+:param direct_internet_access: Whether to enable direct internet access 
for the notebook instance.
+:param root_access: Whether to give the notebook instance root access to 
the Amazon S3 bucket.
+:param wait_for_completion: Whether or not to wait for the notebook to be 
InService before returning
+:param create_instance_kwargs: Additional configuration options for the 
create call.
+:param aws_conn_id: The AWS connection ID to use.
+
+This operator returns The ARN of the created notebook.
+"""
+
+template_fields: Sequence[str] = (
+"instance_name",
+"instance_type",
+"role_arn",
+"volume_size_in_gb",
+"volume_kms_key_id",
+"lifecycle_config_name",
+"direct_internet_access",
+"root_access",
+"wait_for_completion",
+"create_instance_kwargs",
+)
+
+ui_color = "#ff7300"
+
+def __init__(
+self,
+*,
+instance_name: str,
+instance_type: str,
+role_arn: str,
+volume_size_in_gb: int | None = None,
+volume_kms_key_id: str | None = None,
+lifecycle_config_name: str | None = None,
+direct_internet_access: str | None = None,
+root_access: str | None = None,
+create_instance_kwargs: dict[str, Any] = {},
+wait_for_completion: bool = True,
+aws_conn_id: str = "aws_default",
+**kwargs,
+):
+super().__init__(**kwargs)
+self.instance_name = instance_name
+self.instance_type = instance_type
+self.role_arn = role_arn
+self.volume_size_in_gb = volume_size_in_gb
+self.volume_kms_key_id = volume_kms_key_id
+self.lifecycle_config_name = lifecycle_config_name
+self.direct_internet_access = direct_internet_access
+self.root_access = root_access
+self.wait_for_completion = wait_for_completion
+self.aws_conn_id = aws_conn_id
+self.create_instance_kwargs = create_instance_kwargs
+
+if "tags" in self.create_instance_kwargs and 
self.create_instance_kwargs["tags"] is not None:
+self.create_instance_kwargs["tags"] = 
format_tags(self.create_instance_kwargs["tags"])
+
+@cached_property
+def hook(self) -> SageMakerHook:
+"""Create and return SageMakerHook."""
+return SageMakerHook(aws_conn_id=self.aws_conn_id)
+
+def execute(self, context: Context):
+
+create_notebook_instance_kwargs = {
+"NotebookInstanceName": self.instance_name,
+"InstanceType": self.instance_type,
+"RoleArn": self.role_arn,
+"VolumeSizeInGB": self.volume_size_in_gb,
+"KmsKeyId": self.volume_kms_key_id,
+"LifecycleConfigName": self.lifecycle_config_name,
+"DirectInternetAccess": self.direct_internet_access,
+"RootAccess": self.root_access,
+}
+if len(self.create_instance_kwargs) > 0:
+create_notebook_instance_kwargs.update(self.create_instance_kwargs)
+
+self.log.info("Creating SageMaker notebook %s.", self.instance_name)
+response = 
self.hook.conn.create_notebook_instance(**prune_dict(create_notebook_instance_kwargs))
+
+self.log.info("SageMaker notebook created: %s", 
response["NotebookInstanceArn"])
+
+if self.wait_for_completion:
+self.log.info("Waiting for SageMaker notebook %s to be in 
service", self.instance_name)
+waiter = self.hook.conn.get_waiter("notebook_instance_in_service")
+

[GitHub] [airflow] vincbeck commented on a diff in pull request #33219: Added Amazon SageMaker Notebook hook and operators

2023-08-15 Thread via GitHub


vincbeck commented on code in PR #33219:
URL: https://github.com/apache/airflow/pull/33219#discussion_r1294993349


##
tests/providers/amazon/aws/hooks/test_sagemaker_notebook.py:
##


Review Comment:
   You can remove this file



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@airflow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



[GitHub] [airflow] vincbeck commented on a diff in pull request #33219: Added Amazon SageMaker Notebook hook and operators

2023-08-15 Thread via GitHub


vincbeck commented on code in PR #33219:
URL: https://github.com/apache/airflow/pull/33219#discussion_r1294686733


##
airflow/providers/amazon/aws/operators/sagemaker.py:
##
@@ -1523,3 +1523,259 @@ def execute(self, context: Context) -> str:
 arn = ans["ExperimentArn"]
 self.log.info("Experiment %s created successfully with ARN %s.", 
self.name, arn)
 return arn
+
+
+class SageMakerCreateNotebookOperator(BaseOperator):
+"""
+Create a SageMaker notebook.
+
+More information regarding parameters of this operator can be found here
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker/client/create_notebook_instance.html.
+
+.. seealso:
+For more information on how to use this operator, take a look at the 
guide:
+:ref:`howto/operator:SageMakerCreateNotebookOperator`
+
+:param instance_name: The name of the notebook instance.
+:param instance_type: The type of instance to create.
+:param role_arn: The Amazon Resource Name (ARN) of the IAM role that 
SageMaker can assume to access
+:param volume_size_in_gb: Size in GB of the EBS root device volume of the 
notebook instance.
+:param volume_kms_key_id: The KMS key ID for the EBS root device volume.
+:param lifecycle_config_name: The name of the lifecycle configuration to 
associate with the notebook
+:param direct_internet_access: Whether to enable direct internet access 
for the notebook instance.
+:param root_access: Whether to give the notebook instance root access to 
the Amazon S3 bucket.
+:param wait_for_completion: Whether or not to wait for the notebook to be 
InService before returning
+:param create_instance_kwargs: Additional configuration options for the 
create call.
+:param config: Additional configuration options for the create call.
+:param aws_conn_id: The AWS connection ID to use.
+
+This operator returns The ARN of the created notebook.
+"""
+
+template_fields: Sequence[str] = (
+"instance_name",
+"instance_type",
+"role_arn",
+"volume_size_in_gb",
+"volume_kms_key_id",
+"lifecycle_config_name",
+"direct_internet_access",
+"root_access",
+"wait_for_completion",
+"create_instance_kwargs",
+"config",
+)
+
+ui_color = "#ff7300"
+
+def __init__(
+self,
+*,
+instance_name: str,
+instance_type: str,
+role_arn: str,
+volume_size_in_gb: int | None = None,
+volume_kms_key_id: str | None = None,
+lifecycle_config_name: str | None = None,
+direct_internet_access: str | None = None,
+root_access: str | None = None,
+create_instance_kwargs: dict[str, Any] = {},
+wait_for_completion: bool = True,
+config: dict = {},
+aws_conn_id: str = "aws_default",
+**kwargs,
+):
+super().__init__(**kwargs)
+self.instance_name = instance_name
+self.instance_type = instance_type
+self.role_arn = role_arn
+self.volume_size_in_gb = volume_size_in_gb
+self.volume_kms_key_id = volume_kms_key_id
+self.lifecycle_config_name = lifecycle_config_name
+self.direct_internet_access = direct_internet_access
+self.root_access = root_access
+self.wait_for_completion = wait_for_completion
+self.config = config
+self.aws_conn_id = aws_conn_id
+self.create_instance_kwargs = create_instance_kwargs
+
+if "tags" in self.create_instance_kwargs and 
self.create_instance_kwargs["tags"] is not None:
+self.create_instance_kwargs["tags"] = 
format_tags(self.create_instance_kwargs["tags"])
+
+self.create_notebook_instance_kwargs = {

Review Comment:
   Why do you need to store it as instance variable? I would move this part in 
`execute`, no need to save it in the instance



##
airflow/providers/amazon/aws/operators/sagemaker.py:
##
@@ -1523,3 +1523,259 @@ def execute(self, context: Context) -> str:
 arn = ans["ExperimentArn"]
 self.log.info("Experiment %s created successfully with ARN %s.", 
self.name, arn)
 return arn
+
+
+class SageMakerCreateNotebookOperator(BaseOperator):
+"""
+Create a SageMaker notebook.
+
+More information regarding parameters of this operator can be found here
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker/client/create_notebook_instance.html.
+
+.. seealso:
+For more information on how to use this operator, take a look at the 
guide:
+:ref:`howto/operator:SageMakerCreateNotebookOperator`
+
+:param instance_name: The name of the notebook instance.
+:param instance_type: The type of instance to create.
+:param role_arn: The Amazon Resource Name (ARN) of the IAM role that 
SageMaker can assume to access
+:param 

[GitHub] [airflow] vincbeck commented on a diff in pull request #33219: Added Amazon SageMaker Notebook hook and operators

2023-08-08 Thread via GitHub


vincbeck commented on code in PR #33219:
URL: https://github.com/apache/airflow/pull/33219#discussion_r1287491411


##
airflow/providers/amazon/aws/hooks/sagemaker_notebook.py:
##
@@ -0,0 +1,153 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from __future__ import annotations
+
+from typing import Any
+
+from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook
+from airflow.providers.amazon.aws.utils import trim_none_values
+from airflow.providers.amazon.aws.utils.tags import format_tags
+
+
+class SageMakerNotebookHook(AwsBaseHook):
+"""Interact with Amazon SageMaker to execute notebooks.
+
+Provide thick wrapper around
+:external+boto3:py:class:`boto3.client('sagemaker') `
+
+Additional arguments (such as ``aws_conn_id``) may be specified and
+are passed down to the underlying AwsBaseHook.
+
+.. seealso::
+- :class:`airflow.providers.amazon.aws.hooks.base_aws.AwsBaseHook`
+"""
+
+def __init__(self, *args, **kwargs) -> None:
+kwargs["client_type"] = "sagemaker"
+super().__init__(*args, **kwargs)
+
+def create_instance(
+self,
+instance_name: str,
+instance_type: str,
+role_arn: str,
+volume_size_in_gb: int | None = None,
+volume_kms_key_id: str | None = None,
+tags: dict = {},
+subnet_id: str | None = None,
+security_group_ids: list = [],
+lifecycle_config_name: str | None = None,
+direct_internet_access: str | None = None,
+accelerator_types: list = [],
+default_code_repo: list = [],
+additional_code_repos: list = [],
+root_access: str | None = None,
+platform_id: str | None = None,
+imds_config: dict | None = None,
+**kwargs: Any,
+) -> dict:
+"""Create a SageMaker notebook instance.
+
+:param instance_name: The name of the notebook instance.
+:param instance_type: The type of instance to create. For example, 
'ml.t2.medium'.
+:param image_uri: The Amazon EC2 Image URI for the SageMaker image to 
use.
+:param role_arn: The Amazon Resource Name (ARN) of the IAM role to 
associate with the notebook
+instance.
+:param volume_size_in_gb: The size of the EBS volume to attach to the 
notebook instance.
+:param volume_kms_key_id: The KMS key ID to use when creating the 
notebook instance.
+:param tags: A list of tags to associate with the notebook instance.
+:param subnet_id: The ID of the subnet in which to launch the instance.
+:param security_group_ids: A list of security groups to associate with 
the notebook instance.
+
+:param lifecycle_config_name: The name of the lifecycle configuration 
to associate with the notebook
+instance.
+:param direct_internet_access: Whether to enable direct internet 
access for the notebook instance.
+:param accelerator_types: The list of Elastic Inference (EI) 
accelerator types to associate with the
+notebook instance.
+:param default_code_repo: The URL of the Git repository that contains 
the default code for a notebook
+instance.
+:param additional_code_repos: A list of URLs for Git repositories that 
contain custom code for a
+notebook instance.
+:param root_access: Whether to give the notebook instance root access 
to the Amazon S3 bucket.
+:param platform_id: The ID of the platform.
+:param imds_config: The configuration for the instance metadata 
service.
+:param config: Additional configuration options for the create call.
+:param aws_conn_id: The AWS connection ID to use.
+
+:return: A dict containing the information about the create notebook 
instance call.
+"""
+if tags is not None:
+tags = format_tags(tags)
+
+create_notebook_instance_kwargs = {
+"NotebookInstanceName": instance_name,
+"InstanceType": instance_type,
+"RoleArn": role_arn,
+"Tags": tags,
+"VolumeSizeInGB": volume_size_in_gb,
+"KmsKeyId": volume_kms_key_id,
+"SubnetId":