lostluck commented on code in PR #24497:
URL: https://github.com/apache/beam/pull/24497#discussion_r1041520488


##########
sdks/go/pkg/beam/core/runtime/xlangx/expand.go:
##########
@@ -111,12 +112,29 @@ func expand(
                ext.ExpansionAddr = config
        }
 
+       // The external transforms that needs to specify the output coder
+       // in expansion request sends tagged input as xlang.SetOutputCoder.
+       var set bool
+       for tag := range edge.External.InputsMap {

Review Comment:
   Why is the tag on the InputsMap, not the outputs?



##########
sdks/go/pkg/beam/core/runtime/graphx/translate.go:
##########
@@ -687,6 +688,20 @@ func (m *marshaller) expandCrossLanguage(namedEdge 
NamedEdge) (string, error) {
                EnvironmentId: m.addDefaultEnv(),
        }
 
+       // Add the coders for output in the marshaller even if expanded is nil
+       // for output coder field in expansion request.
+       // We need this specifically for Python External Transforms.
+       names := strings.Split(spec.Urn, ":")
+       if len(names) > 2 && names[2] == "python" {

Review Comment:
   Do we need this section locked to python now that we have the explicit 
"SetOuputCoder" hack below? I'd rather not lock it to python if a java 
transform ends up needing it.



##########
sdks/go/pkg/beam/transforms/xlang/inference/inference.go:
##########
@@ -0,0 +1,115 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package inference has the cross language implementation of RunInference API 
implemented in Python SDK.
+// An exapnsion service for python external transforms can be started by 
running
+//
+//     $ python -m apache_beam.runners.portability.expansion_service_main -p 
$PORT_FOR_EXPANSION_SERVICE
+package inference
+
+import (
+       "reflect"
+
+       "github.com/apache/beam/sdks/v2/go/pkg/beam"
+       "github.com/apache/beam/sdks/v2/go/pkg/beam/core/runtime/xlangx"
+       "github.com/apache/beam/sdks/v2/go/pkg/beam/core/typex"
+       "github.com/apache/beam/sdks/v2/go/pkg/beam/transforms/xlang"
+       "github.com/apache/beam/sdks/v2/go/pkg/beam/transforms/xlang/python"
+)
+
+func init() {
+       beam.RegisterType(reflect.TypeOf((*sklearnConfig)(nil)).Elem())
+       beam.RegisterType(reflect.TypeOf((*argsStruct)(nil)).Elem())
+       beam.RegisterType(reflect.TypeOf((*sklearnKwargs)(nil)).Elem())
+       beam.RegisterType(reflect.TypeOf((*PredictionResult)(nil)).Elem())
+}
+
+var outputT = reflect.TypeOf((*PredictionResult)(nil)).Elem()
+
+// PredictionResult represents the result of a prediction obtained from 
Python's RunInference API.
+type PredictionResult struct {
+       Example   []int64 `beam:"example"`
+       Inference int32   `beam:"inference"`
+}
+
+type sklearnConfig struct {
+       kwargs        sklearnKwargs
+       args          argsStruct
+       expansionAddr string
+}
+
+type sklearnConfigOption func(*sklearnConfig)
+
+// WithArgs set arguments for the sklearn inference transform parameters
+func WithArgs(args []string) sklearnConfigOption {
+       return func(c *sklearnConfig) {
+               c.args.args = append(c.args.args, args...)
+       }
+}
+
+// WithExpansionAddr provides URL for Python expansion service.
+func WithExpansionAddr(expansionAddr string) sklearnConfigOption {

Review Comment:
   We may need a different tactic here, since WithExpansionAddr is general, but 
this is attached to a an sklearn specific struct. 
   
   We are either going to need general args handling, that we translate to a 
specific struct afterwards, or need to do something with interfaces to be able 
use the same options for each model type call.



##########
sdks/go/pkg/beam/transforms/xlang/inference/inference.go:
##########
@@ -0,0 +1,115 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package inference has the cross language implementation of RunInference API 
implemented in Python SDK.
+// An exapnsion service for python external transforms can be started by 
running
+//
+//     $ python -m apache_beam.runners.portability.expansion_service_main -p 
$PORT_FOR_EXPANSION_SERVICE
+package inference
+
+import (
+       "reflect"
+
+       "github.com/apache/beam/sdks/v2/go/pkg/beam"
+       "github.com/apache/beam/sdks/v2/go/pkg/beam/core/runtime/xlangx"
+       "github.com/apache/beam/sdks/v2/go/pkg/beam/core/typex"
+       "github.com/apache/beam/sdks/v2/go/pkg/beam/transforms/xlang"
+       "github.com/apache/beam/sdks/v2/go/pkg/beam/transforms/xlang/python"
+)
+
+func init() {
+       beam.RegisterType(reflect.TypeOf((*sklearnConfig)(nil)).Elem())
+       beam.RegisterType(reflect.TypeOf((*argsStruct)(nil)).Elem())
+       beam.RegisterType(reflect.TypeOf((*sklearnKwargs)(nil)).Elem())
+       beam.RegisterType(reflect.TypeOf((*PredictionResult)(nil)).Elem())
+}
+
+var outputT = reflect.TypeOf((*PredictionResult)(nil)).Elem()
+
+// PredictionResult represents the result of a prediction obtained from 
Python's RunInference API.
+type PredictionResult struct {
+       Example   []int64 `beam:"example"`
+       Inference int32   `beam:"inference"`
+}
+
+type sklearnConfig struct {
+       kwargs        sklearnKwargs
+       args          argsStruct
+       expansionAddr string
+}
+
+type sklearnConfigOption func(*sklearnConfig)
+
+// WithArgs set arguments for the sklearn inference transform parameters
+func WithArgs(args []string) sklearnConfigOption {
+       return func(c *sklearnConfig) {
+               c.args.args = append(c.args.args, args...)
+       }
+}
+
+// WithExpansionAddr provides URL for Python expansion service.
+func WithExpansionAddr(expansionAddr string) sklearnConfigOption {
+       return func(c *sklearnConfig) {
+               c.expansionAddr = expansionAddr
+       }
+}
+
+type argsStruct struct {
+       args []string
+}
+
+// sklearnKwargs defines acceptable keyword args for Sklearn Model Handler.
+type sklearnKwargs struct {
+       // ModelHandlerProvider defines the model handler to be used.
+       ModelHandlerProvider python.CallableSource 
`beam:"model_handler_provider"`
+       // ModelURI indicates the model path to be used for Sklearn Model 
Handler.
+       ModelURI string `beam:"model_uri"`
+}
+
+// Sklearn provides inference over a SklearnModelHandler.
+// ModelURI is the required parameter indicating the path to the sklearn model.
+// This wrapper doesn't work for keyed input PCollection.
+//
+// Example:
+//
+//             inputRow := [][]int64{{0, 0}, {1, 1}}
+//         input := beam.CreateList(s, inputRow)
+//         modelURI = gs://example.com/tmp/staged/sklearn_model
+//             predictions := inference.Sklearn(s, modelURI, input, 
inference.WithExpansionAddr(expansionAddr))
+func Sklearn(s beam.Scope, modelUri string, col beam.PCollection, opts 
...sklearnConfigOption) beam.PCollection {
+       s.Scope("xlang.inference.Sklearn")
+
+       cfg := sklearnConfig{}
+       for _, opt := range opts {
+               opt(&cfg)
+       }
+       cfg.kwargs.ModelHandlerProvider = 
python.CallableSource("apache_beam.ml.inference.sklearn_inference.SklearnModelHandlerNumpy")
+       cfg.kwargs.ModelURI = modelUri
+       return runInference[sklearnKwargs](s, col, cfg.args, cfg.kwargs, 
cfg.expansionAddr)
+}
+
+func runInference[Kwargs any](s beam.Scope, col beam.PCollection, a 
argsStruct, k Kwargs, addr string) beam.PCollection {
+       expansionAddr := addr
+       if expansionAddr == "" {
+               expansionAddr = 
xlangx.UseAutomatedPythonExpansionService(python.ExpansionServiceModule)
+       }
+       pet := python.NewExternalTransform[argsStruct, 
Kwargs]("apache_beam.ml.inference.base.RunInference.from_callable")
+       pet.WithKwargs(k)
+       pet.WithArgs(a)
+       pl := beam.CrossLanguagePayload(pet)
+       namedInput := map[string]beam.PCollection{xlang.SetOutputCoder: col}

Review Comment:
   It's very odd that the output coder is being added to the inputs, rather 
than the outputs?
   
   As discussed, I thought we were going to add this as an alternative to the 
"unamed outputs" and then translate that to unnamed downstream.



##########
sdks/go/pkg/beam/transforms/xlang/inference/inference.go:
##########
@@ -0,0 +1,115 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package inference has the cross language implementation of RunInference API 
implemented in Python SDK.
+// An exapnsion service for python external transforms can be started by 
running
+//
+//     $ python -m apache_beam.runners.portability.expansion_service_main -p 
$PORT_FOR_EXPANSION_SERVICE
+package inference
+
+import (
+       "reflect"
+
+       "github.com/apache/beam/sdks/v2/go/pkg/beam"
+       "github.com/apache/beam/sdks/v2/go/pkg/beam/core/runtime/xlangx"
+       "github.com/apache/beam/sdks/v2/go/pkg/beam/core/typex"
+       "github.com/apache/beam/sdks/v2/go/pkg/beam/transforms/xlang"
+       "github.com/apache/beam/sdks/v2/go/pkg/beam/transforms/xlang/python"
+)
+
+func init() {
+       beam.RegisterType(reflect.TypeOf((*sklearnConfig)(nil)).Elem())
+       beam.RegisterType(reflect.TypeOf((*argsStruct)(nil)).Elem())
+       beam.RegisterType(reflect.TypeOf((*sklearnKwargs)(nil)).Elem())
+       beam.RegisterType(reflect.TypeOf((*PredictionResult)(nil)).Elem())
+}
+
+var outputT = reflect.TypeOf((*PredictionResult)(nil)).Elem()
+
+// PredictionResult represents the result of a prediction obtained from 
Python's RunInference API.
+type PredictionResult struct {
+       Example   []int64 `beam:"example"`
+       Inference int32   `beam:"inference"`
+}
+
+type sklearnConfig struct {
+       kwargs        sklearnKwargs
+       args          argsStruct
+       expansionAddr string
+}
+
+type sklearnConfigOption func(*sklearnConfig)
+
+// WithArgs set arguments for the sklearn inference transform parameters
+func WithArgs(args []string) sklearnConfigOption {

Review Comment:
   Since this is sklearn specific, we should probably call this 
`WithSklearnArgs`, assuming not all the models use "sklearn".



##########
sdks/go/pkg/beam/core/runtime/xlangx/namespace.go:
##########
@@ -100,7 +100,22 @@ func addNamespace(t *pipepb.PTransform, c 
*pipepb.Components, namespace string)
                }
        }
 
-       // c.Transforms = make(map[string]*pipepb.PTransform)
+       // update component coderIDs for other coders not present in t.Inputs, 
t.Outputs
+       for id, coder := range c.GetCoders() {
+               if _, exists := idMap[id]; exists {
+                       continue
+               }
+               var updatedComponentCoderIDs []string
+               updatedComponentCoderIDs = append(updatedComponentCoderIDs, 
coder.ComponentCoderIds...)
+               for i, ccid := range coder.GetComponentCoderIds() {
+                       if _, exists := idMap[ccid]; exists {
+                               updatedComponentCoderIDs[i] = idMap[ccid]
+                       }
+               }
+               coder.ComponentCoderIds = updatedComponentCoderIDs
+

Review Comment:
   rm spare line



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to