lostluck commented on code in PR #23450:
URL: https://github.com/apache/beam/pull/23450#discussion_r992931406


##########
sdks/go/pkg/beam/transforms/xlang/external.go:
##########
@@ -0,0 +1,129 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package xlang contains data structures required for python external 
transforms in a multilanguage pipeline.

Review Comment:
   I recommend having nothing in the transforms/xlang package, and put this 
into the python directory instead. It's all about python, and invoking python.
   
   I also recommend simply putting the python utilities into a directory 
parallel to dataframe instead of trying to impose a hierarchy.
   
   eg.
   ```
   transforms/
     xlang/
       python/
       dataframe/
   ```
   
   This also lets you avoid having "xlang.Python..." among other prefixes 
everywhere and instead have python.Callable at the other call sites.



##########
sdks/go/test/integration/transforms/xlang/dataframe/dataframe_test.go:
##########
@@ -0,0 +1,60 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package dataframe
+
+import (
+       "flag"
+       "log"
+       "testing"
+
+       "github.com/apache/beam/sdks/v2/go/pkg/beam"
+       _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/dataflow"
+       _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/flink"
+       _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/universal"
+       "github.com/apache/beam/sdks/v2/go/pkg/beam/testing/ptest"
+       "github.com/apache/beam/sdks/v2/go/test/integration"
+)
+
+var expansionAddr string // Populate with expansion address labelled 
"python_transform".
+
+func checkFlags(t *testing.T) {
+       if expansionAddr == "" {
+               t.Skip("No python transform expansion address provided.")
+       }
+}
+
+func TestDataframe(t *testing.T) {
+       integration.CheckFilters(t)
+       checkFlags(t)
+       p := DataframeTransform(expansionAddr)
+       ptest.RunAndValidate(t, p)
+}
+
+func TestMain(m *testing.M) {
+       flag.Parse()
+       beam.Init()
+
+       services := integration.NewExpansionServices()
+       defer func() { services.Shutdown() }()
+       addr, err := services.GetAddr("python_transform")

Review Comment:
   I'm assuming the initialization of `python_transform` is in another PR?
   
   The portable tests skip this:
   
   ```
   2022/10/05 16:08:29 skipping missing expansion service: expansion service 
labeled "python_transform" not found: no --expansion_jar or --expansion_addr 
flag provided with label "python_transform"
   === RUN   TestDataframe
       dataframe_test.go:35: No python transform expansion address provided.
   ```



##########
sdks/go/pkg/beam/transforms/xlang/external.go:
##########
@@ -0,0 +1,129 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package xlang contains data structures required for python external 
transforms in a multilanguage pipeline.
+package xlang
+
+import (
+       "fmt"
+       "io"
+       "reflect"
+
+       "github.com/apache/beam/sdks/v2/go/pkg/beam"
+       "github.com/apache/beam/sdks/v2/go/pkg/beam/core/graph/coder"
+       "github.com/apache/beam/sdks/v2/go/pkg/beam/core/util/reflectx"
+)
+
+const (
+       pythonCallableUrn = "beam:logical_type:python_callable:v1"
+)
+
+var (
+       pcsType        = reflect.TypeOf((*PythonCallableSource)(nil)).Elem()
+       pcsStorageType = reflectx.String
+)
+
+func init() {
+       beam.RegisterType(pcsType)
+       beam.RegisterSchemaProviderWithURN(pcsType, 
&PythonCallableSourceProvider{}, pythonCallableUrn)
+}
+
+// PythonCallableSource is a wrapper object storing a Python function 
definition
+// that can be evaluated to Python callables in Python SDK.
+//
+// The snippet of Python code can be a valid Python expression such as
+//    lambda x: x * x
+//       str.upper
+// a fully qualified name such as
+//    math.sin
+// or a complete multi-line function or class definition such as
+//    def foo(x):
+//        ...
+//    class Foo:
+//        ...
+//
+// Any lines preceding the function definition are first evaluated to provide 
context in which to
+// define the function which can be useful to declare imports or any other 
needed values, e.g.
+//    import math
+//
+//    def helper(x):
+//        return x * x
+//
+//    def func(y):
+//        return helper(y) + y
+// in which case `func` would get applied to each element.
+type PythonCallableSource string
+
+// PythonCallableSourceProvider implement the SchemaProvider interface for 
logical types
+type PythonCallableSourceProvider struct{}

Review Comment:
   This type isn't  used outside this package, please unexport it.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to