[GitHub] [airflow] vincbeck commented on a diff in pull request #33219: Added Amazon SageMaker Notebook hook and operators
vincbeck commented on code in PR #33219: URL: https://github.com/apache/airflow/pull/33219#discussion_r1295980649 ## airflow/providers/amazon/aws/operators/sagemaker.py: ## @@ -1523,3 +1524,243 @@ def execute(self, context: Context) -> str: arn = ans["ExperimentArn"] self.log.info("Experiment %s created successfully with ARN %s.", self.name, arn) return arn + + +class SageMakerCreateNotebookOperator(BaseOperator): +""" +Create a SageMaker notebook. + +More information regarding parameters of this operator can be found here + https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker/client/create_notebook_instance.html. + +.. seealso: +For more information on how to use this operator, take a look at the guide: +:ref:`howto/operator:SageMakerCreateNotebookOperator` + +:param instance_name: The name of the notebook instance. +:param instance_type: The type of instance to create. +:param role_arn: The Amazon Resource Name (ARN) of the IAM role that SageMaker can assume to access +:param volume_size_in_gb: Size in GB of the EBS root device volume of the notebook instance. +:param volume_kms_key_id: The KMS key ID for the EBS root device volume. +:param lifecycle_config_name: The name of the lifecycle configuration to associate with the notebook +:param direct_internet_access: Whether to enable direct internet access for the notebook instance. +:param root_access: Whether to give the notebook instance root access to the Amazon S3 bucket. +:param wait_for_completion: Whether or not to wait for the notebook to be InService before returning +:param create_instance_kwargs: Additional configuration options for the create call. +:param aws_conn_id: The AWS connection ID to use. + +:return: The ARN of the created notebook. +""" + +template_fields: Sequence[str] = ( +"instance_name", +"instance_type", +"role_arn", +"volume_size_in_gb", +"volume_kms_key_id", +"lifecycle_config_name", +"direct_internet_access", +"root_access", +"wait_for_completion", +"create_instance_kwargs", +) + +ui_color = "#ff7300" + +def __init__( +self, +*, +instance_name: str, +instance_type: str, +role_arn: str, +volume_size_in_gb: int | None = None, +volume_kms_key_id: str | None = None, +lifecycle_config_name: str | None = None, +direct_internet_access: str | None = None, +root_access: str | None = None, +create_instance_kwargs: dict[str, Any] = {}, +wait_for_completion: bool = True, +aws_conn_id: str = "aws_default", +**kwargs, +): +super().__init__(**kwargs) +self.instance_name = instance_name +self.instance_type = instance_type +self.role_arn = role_arn +self.volume_size_in_gb = volume_size_in_gb +self.volume_kms_key_id = volume_kms_key_id +self.lifecycle_config_name = lifecycle_config_name +self.direct_internet_access = direct_internet_access +self.root_access = root_access +self.wait_for_completion = wait_for_completion +self.aws_conn_id = aws_conn_id +self.create_instance_kwargs = create_instance_kwargs + +if self.create_instance_kwargs.get("tags") is not None: +self.create_instance_kwargs["tags"] = format_tags(self.create_instance_kwargs["tags"]) + +@cached_property +def hook(self) -> SageMakerHook: +"""Create and return SageMakerHook.""" +return SageMakerHook(aws_conn_id=self.aws_conn_id) + +def execute(self, context: Context): + +create_notebook_instance_kwargs = { +"NotebookInstanceName": self.instance_name, +"InstanceType": self.instance_type, +"RoleArn": self.role_arn, +"VolumeSizeInGB": self.volume_size_in_gb, +"KmsKeyId": self.volume_kms_key_id, +"LifecycleConfigName": self.lifecycle_config_name, +"DirectInternetAccess": self.direct_internet_access, +"RootAccess": self.root_access, +} +if len(self.create_instance_kwargs) > 0: +create_notebook_instance_kwargs.update(self.create_instance_kwargs) + +self.log.info("Creating SageMaker notebook %s.", self.instance_name) +response = self.hook.conn.create_notebook_instance(**prune_dict(create_notebook_instance_kwargs)) + +self.log.info("SageMaker notebook created: %s", response["NotebookInstanceArn"]) + +if self.wait_for_completion: +self.log.info("Waiting for SageMaker notebook %s to be in service", self.instance_name) +waiter = self.hook.conn.get_waiter("notebook_instance_in_service") +waiter.wait(NotebookInstanceName=self.instance_name) + +return
[GitHub] [airflow] vincbeck commented on a diff in pull request #33219: Added Amazon SageMaker Notebook hook and operators
vincbeck commented on code in PR #33219: URL: https://github.com/apache/airflow/pull/33219#discussion_r1294992440 ## airflow/providers/amazon/aws/operators/sagemaker.py: ## @@ -1523,3 +1524,246 @@ def execute(self, context: Context) -> str: arn = ans["ExperimentArn"] self.log.info("Experiment %s created successfully with ARN %s.", self.name, arn) return arn + + +class SageMakerCreateNotebookOperator(BaseOperator): +""" +Create a SageMaker notebook. + +More information regarding parameters of this operator can be found here + https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker/client/create_notebook_instance.html. + +.. seealso: +For more information on how to use this operator, take a look at the guide: +:ref:`howto/operator:SageMakerCreateNotebookOperator` + +:param instance_name: The name of the notebook instance. +:param instance_type: The type of instance to create. +:param role_arn: The Amazon Resource Name (ARN) of the IAM role that SageMaker can assume to access +:param volume_size_in_gb: Size in GB of the EBS root device volume of the notebook instance. +:param volume_kms_key_id: The KMS key ID for the EBS root device volume. +:param lifecycle_config_name: The name of the lifecycle configuration to associate with the notebook +:param direct_internet_access: Whether to enable direct internet access for the notebook instance. +:param root_access: Whether to give the notebook instance root access to the Amazon S3 bucket. +:param wait_for_completion: Whether or not to wait for the notebook to be InService before returning +:param create_instance_kwargs: Additional configuration options for the create call. +:param aws_conn_id: The AWS connection ID to use. + +This operator returns The ARN of the created notebook. +""" + +template_fields: Sequence[str] = ( +"instance_name", +"instance_type", +"role_arn", +"volume_size_in_gb", +"volume_kms_key_id", +"lifecycle_config_name", +"direct_internet_access", +"root_access", +"wait_for_completion", +"create_instance_kwargs", +) + +ui_color = "#ff7300" + +def __init__( +self, +*, +instance_name: str, +instance_type: str, +role_arn: str, +volume_size_in_gb: int | None = None, +volume_kms_key_id: str | None = None, +lifecycle_config_name: str | None = None, +direct_internet_access: str | None = None, +root_access: str | None = None, +create_instance_kwargs: dict[str, Any] = {}, +wait_for_completion: bool = True, +aws_conn_id: str = "aws_default", +**kwargs, +): +super().__init__(**kwargs) +self.instance_name = instance_name +self.instance_type = instance_type +self.role_arn = role_arn +self.volume_size_in_gb = volume_size_in_gb +self.volume_kms_key_id = volume_kms_key_id +self.lifecycle_config_name = lifecycle_config_name +self.direct_internet_access = direct_internet_access +self.root_access = root_access +self.wait_for_completion = wait_for_completion +self.aws_conn_id = aws_conn_id +self.create_instance_kwargs = create_instance_kwargs + +if "tags" in self.create_instance_kwargs and self.create_instance_kwargs["tags"] is not None: +self.create_instance_kwargs["tags"] = format_tags(self.create_instance_kwargs["tags"]) + +@cached_property +def hook(self) -> SageMakerHook: +"""Create and return SageMakerHook.""" +return SageMakerHook(aws_conn_id=self.aws_conn_id) + +def execute(self, context: Context): + +create_notebook_instance_kwargs = { +"NotebookInstanceName": self.instance_name, +"InstanceType": self.instance_type, +"RoleArn": self.role_arn, +"VolumeSizeInGB": self.volume_size_in_gb, +"KmsKeyId": self.volume_kms_key_id, +"LifecycleConfigName": self.lifecycle_config_name, +"DirectInternetAccess": self.direct_internet_access, +"RootAccess": self.root_access, +} +if len(self.create_instance_kwargs) > 0: +create_notebook_instance_kwargs.update(self.create_instance_kwargs) + +self.log.info("Creating SageMaker notebook %s.", self.instance_name) +response = self.hook.conn.create_notebook_instance(**prune_dict(create_notebook_instance_kwargs)) + +self.log.info("SageMaker notebook created: %s", response["NotebookInstanceArn"]) + +if self.wait_for_completion: +self.log.info("Waiting for SageMaker notebook %s to be in service", self.instance_name) +waiter = self.hook.conn.get_waiter("notebook_instance_in_service") +
[GitHub] [airflow] vincbeck commented on a diff in pull request #33219: Added Amazon SageMaker Notebook hook and operators
vincbeck commented on code in PR #33219: URL: https://github.com/apache/airflow/pull/33219#discussion_r1294990193 ## airflow/providers/amazon/aws/operators/sagemaker.py: ## @@ -1523,3 +1524,246 @@ def execute(self, context: Context) -> str: arn = ans["ExperimentArn"] self.log.info("Experiment %s created successfully with ARN %s.", self.name, arn) return arn + + +class SageMakerCreateNotebookOperator(BaseOperator): +""" +Create a SageMaker notebook. + +More information regarding parameters of this operator can be found here + https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker/client/create_notebook_instance.html. + +.. seealso: +For more information on how to use this operator, take a look at the guide: +:ref:`howto/operator:SageMakerCreateNotebookOperator` + +:param instance_name: The name of the notebook instance. +:param instance_type: The type of instance to create. +:param role_arn: The Amazon Resource Name (ARN) of the IAM role that SageMaker can assume to access +:param volume_size_in_gb: Size in GB of the EBS root device volume of the notebook instance. +:param volume_kms_key_id: The KMS key ID for the EBS root device volume. +:param lifecycle_config_name: The name of the lifecycle configuration to associate with the notebook +:param direct_internet_access: Whether to enable direct internet access for the notebook instance. +:param root_access: Whether to give the notebook instance root access to the Amazon S3 bucket. +:param wait_for_completion: Whether or not to wait for the notebook to be InService before returning +:param create_instance_kwargs: Additional configuration options for the create call. +:param aws_conn_id: The AWS connection ID to use. + +This operator returns The ARN of the created notebook. +""" + +template_fields: Sequence[str] = ( +"instance_name", +"instance_type", +"role_arn", +"volume_size_in_gb", +"volume_kms_key_id", +"lifecycle_config_name", +"direct_internet_access", +"root_access", +"wait_for_completion", +"create_instance_kwargs", +) + +ui_color = "#ff7300" + +def __init__( +self, +*, +instance_name: str, +instance_type: str, +role_arn: str, +volume_size_in_gb: int | None = None, +volume_kms_key_id: str | None = None, +lifecycle_config_name: str | None = None, +direct_internet_access: str | None = None, +root_access: str | None = None, +create_instance_kwargs: dict[str, Any] = {}, +wait_for_completion: bool = True, +aws_conn_id: str = "aws_default", +**kwargs, +): +super().__init__(**kwargs) +self.instance_name = instance_name +self.instance_type = instance_type +self.role_arn = role_arn +self.volume_size_in_gb = volume_size_in_gb +self.volume_kms_key_id = volume_kms_key_id +self.lifecycle_config_name = lifecycle_config_name +self.direct_internet_access = direct_internet_access +self.root_access = root_access +self.wait_for_completion = wait_for_completion +self.aws_conn_id = aws_conn_id +self.create_instance_kwargs = create_instance_kwargs + +if "tags" in self.create_instance_kwargs and self.create_instance_kwargs["tags"] is not None: Review Comment: nit ```suggestion if self.create_instance_kwargs.get("tags") is not None: ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@airflow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [airflow] vincbeck commented on a diff in pull request #33219: Added Amazon SageMaker Notebook hook and operators
vincbeck commented on code in PR #33219: URL: https://github.com/apache/airflow/pull/33219#discussion_r1294992979 ## airflow/providers/amazon/aws/operators/sagemaker.py: ## @@ -1523,3 +1524,246 @@ def execute(self, context: Context) -> str: arn = ans["ExperimentArn"] self.log.info("Experiment %s created successfully with ARN %s.", self.name, arn) return arn + + +class SageMakerCreateNotebookOperator(BaseOperator): +""" +Create a SageMaker notebook. + +More information regarding parameters of this operator can be found here + https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker/client/create_notebook_instance.html. + +.. seealso: +For more information on how to use this operator, take a look at the guide: +:ref:`howto/operator:SageMakerCreateNotebookOperator` + +:param instance_name: The name of the notebook instance. +:param instance_type: The type of instance to create. +:param role_arn: The Amazon Resource Name (ARN) of the IAM role that SageMaker can assume to access +:param volume_size_in_gb: Size in GB of the EBS root device volume of the notebook instance. +:param volume_kms_key_id: The KMS key ID for the EBS root device volume. +:param lifecycle_config_name: The name of the lifecycle configuration to associate with the notebook +:param direct_internet_access: Whether to enable direct internet access for the notebook instance. +:param root_access: Whether to give the notebook instance root access to the Amazon S3 bucket. +:param wait_for_completion: Whether or not to wait for the notebook to be InService before returning +:param create_instance_kwargs: Additional configuration options for the create call. +:param aws_conn_id: The AWS connection ID to use. + +This operator returns The ARN of the created notebook. +""" + +template_fields: Sequence[str] = ( +"instance_name", +"instance_type", +"role_arn", +"volume_size_in_gb", +"volume_kms_key_id", +"lifecycle_config_name", +"direct_internet_access", +"root_access", +"wait_for_completion", +"create_instance_kwargs", +) + +ui_color = "#ff7300" + +def __init__( +self, +*, +instance_name: str, +instance_type: str, +role_arn: str, +volume_size_in_gb: int | None = None, +volume_kms_key_id: str | None = None, +lifecycle_config_name: str | None = None, +direct_internet_access: str | None = None, +root_access: str | None = None, +create_instance_kwargs: dict[str, Any] = {}, +wait_for_completion: bool = True, +aws_conn_id: str = "aws_default", +**kwargs, +): +super().__init__(**kwargs) +self.instance_name = instance_name +self.instance_type = instance_type +self.role_arn = role_arn +self.volume_size_in_gb = volume_size_in_gb +self.volume_kms_key_id = volume_kms_key_id +self.lifecycle_config_name = lifecycle_config_name +self.direct_internet_access = direct_internet_access +self.root_access = root_access +self.wait_for_completion = wait_for_completion +self.aws_conn_id = aws_conn_id +self.create_instance_kwargs = create_instance_kwargs + +if "tags" in self.create_instance_kwargs and self.create_instance_kwargs["tags"] is not None: +self.create_instance_kwargs["tags"] = format_tags(self.create_instance_kwargs["tags"]) + +@cached_property +def hook(self) -> SageMakerHook: +"""Create and return SageMakerHook.""" +return SageMakerHook(aws_conn_id=self.aws_conn_id) + +def execute(self, context: Context): + +create_notebook_instance_kwargs = { +"NotebookInstanceName": self.instance_name, +"InstanceType": self.instance_type, +"RoleArn": self.role_arn, +"VolumeSizeInGB": self.volume_size_in_gb, +"KmsKeyId": self.volume_kms_key_id, +"LifecycleConfigName": self.lifecycle_config_name, +"DirectInternetAccess": self.direct_internet_access, +"RootAccess": self.root_access, +} +if len(self.create_instance_kwargs) > 0: +create_notebook_instance_kwargs.update(self.create_instance_kwargs) + +self.log.info("Creating SageMaker notebook %s.", self.instance_name) +response = self.hook.conn.create_notebook_instance(**prune_dict(create_notebook_instance_kwargs)) + +self.log.info("SageMaker notebook created: %s", response["NotebookInstanceArn"]) + +if self.wait_for_completion: +self.log.info("Waiting for SageMaker notebook %s to be in service", self.instance_name) +waiter = self.hook.conn.get_waiter("notebook_instance_in_service") +
[GitHub] [airflow] vincbeck commented on a diff in pull request #33219: Added Amazon SageMaker Notebook hook and operators
vincbeck commented on code in PR #33219: URL: https://github.com/apache/airflow/pull/33219#discussion_r1294989565 ## airflow/providers/amazon/aws/operators/sagemaker.py: ## @@ -1523,3 +1524,246 @@ def execute(self, context: Context) -> str: arn = ans["ExperimentArn"] self.log.info("Experiment %s created successfully with ARN %s.", self.name, arn) return arn + + +class SageMakerCreateNotebookOperator(BaseOperator): +""" +Create a SageMaker notebook. + +More information regarding parameters of this operator can be found here + https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker/client/create_notebook_instance.html. + +.. seealso: +For more information on how to use this operator, take a look at the guide: +:ref:`howto/operator:SageMakerCreateNotebookOperator` + +:param instance_name: The name of the notebook instance. +:param instance_type: The type of instance to create. +:param role_arn: The Amazon Resource Name (ARN) of the IAM role that SageMaker can assume to access +:param volume_size_in_gb: Size in GB of the EBS root device volume of the notebook instance. +:param volume_kms_key_id: The KMS key ID for the EBS root device volume. +:param lifecycle_config_name: The name of the lifecycle configuration to associate with the notebook +:param direct_internet_access: Whether to enable direct internet access for the notebook instance. +:param root_access: Whether to give the notebook instance root access to the Amazon S3 bucket. +:param wait_for_completion: Whether or not to wait for the notebook to be InService before returning +:param create_instance_kwargs: Additional configuration options for the create call. +:param aws_conn_id: The AWS connection ID to use. + +This operator returns The ARN of the created notebook. Review Comment: ```suggestion :return: The ARN of the created notebook. ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@airflow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [airflow] vincbeck commented on a diff in pull request #33219: Added Amazon SageMaker Notebook hook and operators
vincbeck commented on code in PR #33219: URL: https://github.com/apache/airflow/pull/33219#discussion_r1294992827 ## airflow/providers/amazon/aws/operators/sagemaker.py: ## @@ -1523,3 +1524,246 @@ def execute(self, context: Context) -> str: arn = ans["ExperimentArn"] self.log.info("Experiment %s created successfully with ARN %s.", self.name, arn) return arn + + +class SageMakerCreateNotebookOperator(BaseOperator): +""" +Create a SageMaker notebook. + +More information regarding parameters of this operator can be found here + https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker/client/create_notebook_instance.html. + +.. seealso: +For more information on how to use this operator, take a look at the guide: +:ref:`howto/operator:SageMakerCreateNotebookOperator` + +:param instance_name: The name of the notebook instance. +:param instance_type: The type of instance to create. +:param role_arn: The Amazon Resource Name (ARN) of the IAM role that SageMaker can assume to access +:param volume_size_in_gb: Size in GB of the EBS root device volume of the notebook instance. +:param volume_kms_key_id: The KMS key ID for the EBS root device volume. +:param lifecycle_config_name: The name of the lifecycle configuration to associate with the notebook +:param direct_internet_access: Whether to enable direct internet access for the notebook instance. +:param root_access: Whether to give the notebook instance root access to the Amazon S3 bucket. +:param wait_for_completion: Whether or not to wait for the notebook to be InService before returning +:param create_instance_kwargs: Additional configuration options for the create call. +:param aws_conn_id: The AWS connection ID to use. + +This operator returns The ARN of the created notebook. +""" + +template_fields: Sequence[str] = ( +"instance_name", +"instance_type", +"role_arn", +"volume_size_in_gb", +"volume_kms_key_id", +"lifecycle_config_name", +"direct_internet_access", +"root_access", +"wait_for_completion", +"create_instance_kwargs", +) + +ui_color = "#ff7300" + +def __init__( +self, +*, +instance_name: str, +instance_type: str, +role_arn: str, +volume_size_in_gb: int | None = None, +volume_kms_key_id: str | None = None, +lifecycle_config_name: str | None = None, +direct_internet_access: str | None = None, +root_access: str | None = None, +create_instance_kwargs: dict[str, Any] = {}, +wait_for_completion: bool = True, +aws_conn_id: str = "aws_default", +**kwargs, +): +super().__init__(**kwargs) +self.instance_name = instance_name +self.instance_type = instance_type +self.role_arn = role_arn +self.volume_size_in_gb = volume_size_in_gb +self.volume_kms_key_id = volume_kms_key_id +self.lifecycle_config_name = lifecycle_config_name +self.direct_internet_access = direct_internet_access +self.root_access = root_access +self.wait_for_completion = wait_for_completion +self.aws_conn_id = aws_conn_id +self.create_instance_kwargs = create_instance_kwargs + +if "tags" in self.create_instance_kwargs and self.create_instance_kwargs["tags"] is not None: +self.create_instance_kwargs["tags"] = format_tags(self.create_instance_kwargs["tags"]) + +@cached_property +def hook(self) -> SageMakerHook: +"""Create and return SageMakerHook.""" +return SageMakerHook(aws_conn_id=self.aws_conn_id) + +def execute(self, context: Context): + +create_notebook_instance_kwargs = { +"NotebookInstanceName": self.instance_name, +"InstanceType": self.instance_type, +"RoleArn": self.role_arn, +"VolumeSizeInGB": self.volume_size_in_gb, +"KmsKeyId": self.volume_kms_key_id, +"LifecycleConfigName": self.lifecycle_config_name, +"DirectInternetAccess": self.direct_internet_access, +"RootAccess": self.root_access, +} +if len(self.create_instance_kwargs) > 0: +create_notebook_instance_kwargs.update(self.create_instance_kwargs) + +self.log.info("Creating SageMaker notebook %s.", self.instance_name) +response = self.hook.conn.create_notebook_instance(**prune_dict(create_notebook_instance_kwargs)) + +self.log.info("SageMaker notebook created: %s", response["NotebookInstanceArn"]) + +if self.wait_for_completion: +self.log.info("Waiting for SageMaker notebook %s to be in service", self.instance_name) +waiter = self.hook.conn.get_waiter("notebook_instance_in_service") +
[GitHub] [airflow] vincbeck commented on a diff in pull request #33219: Added Amazon SageMaker Notebook hook and operators
vincbeck commented on code in PR #33219: URL: https://github.com/apache/airflow/pull/33219#discussion_r1294993349 ## tests/providers/amazon/aws/hooks/test_sagemaker_notebook.py: ## Review Comment: You can remove this file -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@airflow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [airflow] vincbeck commented on a diff in pull request #33219: Added Amazon SageMaker Notebook hook and operators
vincbeck commented on code in PR #33219: URL: https://github.com/apache/airflow/pull/33219#discussion_r1294686733 ## airflow/providers/amazon/aws/operators/sagemaker.py: ## @@ -1523,3 +1523,259 @@ def execute(self, context: Context) -> str: arn = ans["ExperimentArn"] self.log.info("Experiment %s created successfully with ARN %s.", self.name, arn) return arn + + +class SageMakerCreateNotebookOperator(BaseOperator): +""" +Create a SageMaker notebook. + +More information regarding parameters of this operator can be found here + https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker/client/create_notebook_instance.html. + +.. seealso: +For more information on how to use this operator, take a look at the guide: +:ref:`howto/operator:SageMakerCreateNotebookOperator` + +:param instance_name: The name of the notebook instance. +:param instance_type: The type of instance to create. +:param role_arn: The Amazon Resource Name (ARN) of the IAM role that SageMaker can assume to access +:param volume_size_in_gb: Size in GB of the EBS root device volume of the notebook instance. +:param volume_kms_key_id: The KMS key ID for the EBS root device volume. +:param lifecycle_config_name: The name of the lifecycle configuration to associate with the notebook +:param direct_internet_access: Whether to enable direct internet access for the notebook instance. +:param root_access: Whether to give the notebook instance root access to the Amazon S3 bucket. +:param wait_for_completion: Whether or not to wait for the notebook to be InService before returning +:param create_instance_kwargs: Additional configuration options for the create call. +:param config: Additional configuration options for the create call. +:param aws_conn_id: The AWS connection ID to use. + +This operator returns The ARN of the created notebook. +""" + +template_fields: Sequence[str] = ( +"instance_name", +"instance_type", +"role_arn", +"volume_size_in_gb", +"volume_kms_key_id", +"lifecycle_config_name", +"direct_internet_access", +"root_access", +"wait_for_completion", +"create_instance_kwargs", +"config", +) + +ui_color = "#ff7300" + +def __init__( +self, +*, +instance_name: str, +instance_type: str, +role_arn: str, +volume_size_in_gb: int | None = None, +volume_kms_key_id: str | None = None, +lifecycle_config_name: str | None = None, +direct_internet_access: str | None = None, +root_access: str | None = None, +create_instance_kwargs: dict[str, Any] = {}, +wait_for_completion: bool = True, +config: dict = {}, +aws_conn_id: str = "aws_default", +**kwargs, +): +super().__init__(**kwargs) +self.instance_name = instance_name +self.instance_type = instance_type +self.role_arn = role_arn +self.volume_size_in_gb = volume_size_in_gb +self.volume_kms_key_id = volume_kms_key_id +self.lifecycle_config_name = lifecycle_config_name +self.direct_internet_access = direct_internet_access +self.root_access = root_access +self.wait_for_completion = wait_for_completion +self.config = config +self.aws_conn_id = aws_conn_id +self.create_instance_kwargs = create_instance_kwargs + +if "tags" in self.create_instance_kwargs and self.create_instance_kwargs["tags"] is not None: +self.create_instance_kwargs["tags"] = format_tags(self.create_instance_kwargs["tags"]) + +self.create_notebook_instance_kwargs = { Review Comment: Why do you need to store it as instance variable? I would move this part in `execute`, no need to save it in the instance ## airflow/providers/amazon/aws/operators/sagemaker.py: ## @@ -1523,3 +1523,259 @@ def execute(self, context: Context) -> str: arn = ans["ExperimentArn"] self.log.info("Experiment %s created successfully with ARN %s.", self.name, arn) return arn + + +class SageMakerCreateNotebookOperator(BaseOperator): +""" +Create a SageMaker notebook. + +More information regarding parameters of this operator can be found here + https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker/client/create_notebook_instance.html. + +.. seealso: +For more information on how to use this operator, take a look at the guide: +:ref:`howto/operator:SageMakerCreateNotebookOperator` + +:param instance_name: The name of the notebook instance. +:param instance_type: The type of instance to create. +:param role_arn: The Amazon Resource Name (ARN) of the IAM role that SageMaker can assume to access +:param
[GitHub] [airflow] vincbeck commented on a diff in pull request #33219: Added Amazon SageMaker Notebook hook and operators
vincbeck commented on code in PR #33219: URL: https://github.com/apache/airflow/pull/33219#discussion_r1287491411 ## airflow/providers/amazon/aws/hooks/sagemaker_notebook.py: ## @@ -0,0 +1,153 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from typing import Any + +from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook +from airflow.providers.amazon.aws.utils import trim_none_values +from airflow.providers.amazon.aws.utils.tags import format_tags + + +class SageMakerNotebookHook(AwsBaseHook): +"""Interact with Amazon SageMaker to execute notebooks. + +Provide thick wrapper around +:external+boto3:py:class:`boto3.client('sagemaker') ` + +Additional arguments (such as ``aws_conn_id``) may be specified and +are passed down to the underlying AwsBaseHook. + +.. seealso:: +- :class:`airflow.providers.amazon.aws.hooks.base_aws.AwsBaseHook` +""" + +def __init__(self, *args, **kwargs) -> None: +kwargs["client_type"] = "sagemaker" +super().__init__(*args, **kwargs) + +def create_instance( +self, +instance_name: str, +instance_type: str, +role_arn: str, +volume_size_in_gb: int | None = None, +volume_kms_key_id: str | None = None, +tags: dict = {}, +subnet_id: str | None = None, +security_group_ids: list = [], +lifecycle_config_name: str | None = None, +direct_internet_access: str | None = None, +accelerator_types: list = [], +default_code_repo: list = [], +additional_code_repos: list = [], +root_access: str | None = None, +platform_id: str | None = None, +imds_config: dict | None = None, +**kwargs: Any, +) -> dict: +"""Create a SageMaker notebook instance. + +:param instance_name: The name of the notebook instance. +:param instance_type: The type of instance to create. For example, 'ml.t2.medium'. +:param image_uri: The Amazon EC2 Image URI for the SageMaker image to use. +:param role_arn: The Amazon Resource Name (ARN) of the IAM role to associate with the notebook +instance. +:param volume_size_in_gb: The size of the EBS volume to attach to the notebook instance. +:param volume_kms_key_id: The KMS key ID to use when creating the notebook instance. +:param tags: A list of tags to associate with the notebook instance. +:param subnet_id: The ID of the subnet in which to launch the instance. +:param security_group_ids: A list of security groups to associate with the notebook instance. + +:param lifecycle_config_name: The name of the lifecycle configuration to associate with the notebook +instance. +:param direct_internet_access: Whether to enable direct internet access for the notebook instance. +:param accelerator_types: The list of Elastic Inference (EI) accelerator types to associate with the +notebook instance. +:param default_code_repo: The URL of the Git repository that contains the default code for a notebook +instance. +:param additional_code_repos: A list of URLs for Git repositories that contain custom code for a +notebook instance. +:param root_access: Whether to give the notebook instance root access to the Amazon S3 bucket. +:param platform_id: The ID of the platform. +:param imds_config: The configuration for the instance metadata service. +:param config: Additional configuration options for the create call. +:param aws_conn_id: The AWS connection ID to use. + +:return: A dict containing the information about the create notebook instance call. +""" +if tags is not None: +tags = format_tags(tags) + +create_notebook_instance_kwargs = { +"NotebookInstanceName": instance_name, +"InstanceType": instance_type, +"RoleArn": role_arn, +"Tags": tags, +"VolumeSizeInGB": volume_size_in_gb, +"KmsKeyId": volume_kms_key_id, +"SubnetId":