robertwb commented on a change in pull request #13606:
URL: https://github.com/apache/beam/pull/13606#discussion_r560409255
##########
File path: model/pipeline/src/main/proto/beam_runner_api.proto
##########
@@ -1342,7 +1342,11 @@ message StandardArtifacts {
enum Roles {
// A URN for staging-to role.
// payload: ArtifactStagingToRolePayload
- STAGING_TO = 0 [(beam_urn) = "beam:artifact:role:staging_to:v1"];
+ STAGING_TO = 0 [(beam_urn) = "beam:artifact:role:staging_to:v1"];
+
+ // A URN for pypi-install-req role.
+ // payload: None
+ PYPI_INSTALL_REQ = 1 [(beam_urn) =
"beam:artifact:role:pypi_install_req:v1"];
Review comment:
Let's spell out requirements here.
##########
File path: sdks/go/pkg/beam/artifact/materialize.go
##########
@@ -69,41 +74,100 @@ func newMaterialize(ctx context.Context, endpoint string,
dependencies []*pipepb
return newMaterializeWithClient(ctx,
jobpb.NewArtifactRetrievalServiceClient(cc), dependencies, dest)
}
-func newMaterializeWithClient(ctx context.Context, client
jobpb.ArtifactRetrievalServiceClient, dependencies
[]*pipepb.ArtifactInformation, dest string) ([]*jobpb.ArtifactMetadata, error) {
+func newMaterializeWithClient(ctx context.Context, client
jobpb.ArtifactRetrievalServiceClient, dependencies
[]*pipepb.ArtifactInformation, dest string) ([]*pipepb.ArtifactInformation,
error) {
resolution, err := client.ResolveArtifacts(ctx,
&jobpb.ResolveArtifactsRequest{Artifacts: dependencies})
if err != nil {
return nil, err
}
- var md []*jobpb.ArtifactMetadata
+ var artifacts []*pipepb.ArtifactInformation
var list []retrievable
for _, dep := range resolution.Replacements {
path, err := extractStagingToPath(dep)
if err != nil {
return nil, err
}
- md = append(md, &jobpb.ArtifactMetadata{
- Name: path,
+ var filePayload pipepb.ArtifactFilePayload
+ if dep.TypeUrn != URNFileArtifact {
+ filePayload = pipepb.ArtifactFilePayload{
+ Path: path,
+ }
+ } else {
+ typePayload := pipepb.ArtifactFilePayload{}
+ if err := proto.Unmarshal(dep.TypePayload,
&typePayload); err != nil {
+ return nil, errors.Wrap(err, "failed to parse
artifact file payload")
+ }
+ filePayload = pipepb.ArtifactFilePayload{
+ Path: path,
+ Sha256: typePayload.Sha256,
+ }
+ }
+ newTypePayload, err := proto.Marshal(&filePayload)
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to create artifact
type payload")
+ }
+ artifacts = append(artifacts, &pipepb.ArtifactInformation{
+ TypeUrn: URNFileArtifact,
+ TypePayload: newTypePayload,
+ RoleUrn: dep.RoleUrn,
+ RolePayload: dep.RolePayload,
})
+ rolePayload, err :=
proto.Marshal(&pipepb.ArtifactStagingToRolePayload{
+ StagedName: path,
+ })
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to create artifact
role payload")
+ }
list = append(list, &artifact{
client: client,
- dep: dep,
+ dep: &pipepb.ArtifactInformation{
+ TypeUrn: dep.TypeUrn,
+ TypePayload: dep.TypePayload,
+ RoleUrn: URNStagingTo,
+ RolePayload: rolePayload,
+ },
})
}
- return md, MultiRetrieve(ctx, 10, list, dest)
+ return artifacts, MultiRetrieve(ctx, 10, list, dest)
+}
+
+// Used for generating unique IDs. We assign uniquely generated names to
staged files without staging names.
+var idCounter uint64
+
+func generateId() string {
+ id := atomic.AddUint64(&idCounter, 1)
+ return strconv.FormatUint(id, 10)
}
func extractStagingToPath(artifact *pipepb.ArtifactInformation) (string,
error) {
- if artifact.RoleUrn != URNStagingTo {
- return "", errors.Errorf("Unsupported artifact role %s",
artifact.RoleUrn)
+ var stagedName string
+ if artifact.RoleUrn == URNStagingTo {
+ role := pipepb.ArtifactStagingToRolePayload{}
+ if err := proto.Unmarshal(artifact.RolePayload, &role); err !=
nil {
+ return "", err
+ }
+ stagedName = role.StagedName
+ } else {
Review comment:
Guard this with artifact.TypeUrn is URNFileArtifact? And a final else
that's an error?
##########
File path: sdks/go/pkg/beam/artifact/materialize.go
##########
@@ -69,41 +74,100 @@ func newMaterialize(ctx context.Context, endpoint string,
dependencies []*pipepb
return newMaterializeWithClient(ctx,
jobpb.NewArtifactRetrievalServiceClient(cc), dependencies, dest)
}
-func newMaterializeWithClient(ctx context.Context, client
jobpb.ArtifactRetrievalServiceClient, dependencies
[]*pipepb.ArtifactInformation, dest string) ([]*jobpb.ArtifactMetadata, error) {
+func newMaterializeWithClient(ctx context.Context, client
jobpb.ArtifactRetrievalServiceClient, dependencies
[]*pipepb.ArtifactInformation, dest string) ([]*pipepb.ArtifactInformation,
error) {
resolution, err := client.ResolveArtifacts(ctx,
&jobpb.ResolveArtifactsRequest{Artifacts: dependencies})
if err != nil {
return nil, err
}
- var md []*jobpb.ArtifactMetadata
+ var artifacts []*pipepb.ArtifactInformation
var list []retrievable
for _, dep := range resolution.Replacements {
path, err := extractStagingToPath(dep)
if err != nil {
return nil, err
}
- md = append(md, &jobpb.ArtifactMetadata{
- Name: path,
+ var filePayload pipepb.ArtifactFilePayload
+ if dep.TypeUrn != URNFileArtifact {
Review comment:
Nit: I'd put this case, which is kind of like a default/everything else,
last. Also, do we know this always works, or should we throw an error on
unknown types?
##########
File path: sdks/go/pkg/beam/artifact/materialize.go
##########
@@ -172,7 +236,7 @@ func writeChunks(stream
jobpb.ArtifactRetrievalService_GetArtifactClient, w io.W
return nil
}
-func legacyMaterialize(ctx context.Context, endpoint string, rt string, dest
string) ([]*jobpb.ArtifactMetadata, error) {
+func legacyMaterialize(ctx context.Context, endpoint string, rt string, dest
string) ([]*pipepb.ArtifactInformation, error) {
Review comment:
Do you know what the state is of getting rid of the legacy protocol
altogether?
##########
File path: sdks/go/pkg/beam/artifact/materialize.go
##########
@@ -69,41 +74,100 @@ func newMaterialize(ctx context.Context, endpoint string,
dependencies []*pipepb
return newMaterializeWithClient(ctx,
jobpb.NewArtifactRetrievalServiceClient(cc), dependencies, dest)
}
-func newMaterializeWithClient(ctx context.Context, client
jobpb.ArtifactRetrievalServiceClient, dependencies
[]*pipepb.ArtifactInformation, dest string) ([]*jobpb.ArtifactMetadata, error) {
+func newMaterializeWithClient(ctx context.Context, client
jobpb.ArtifactRetrievalServiceClient, dependencies
[]*pipepb.ArtifactInformation, dest string) ([]*pipepb.ArtifactInformation,
error) {
resolution, err := client.ResolveArtifacts(ctx,
&jobpb.ResolveArtifactsRequest{Artifacts: dependencies})
if err != nil {
return nil, err
}
- var md []*jobpb.ArtifactMetadata
+ var artifacts []*pipepb.ArtifactInformation
var list []retrievable
for _, dep := range resolution.Replacements {
path, err := extractStagingToPath(dep)
if err != nil {
return nil, err
}
- md = append(md, &jobpb.ArtifactMetadata{
- Name: path,
+ var filePayload pipepb.ArtifactFilePayload
+ if dep.TypeUrn != URNFileArtifact {
Review comment:
Actually, this might be clearer constructing filePayload unconditionally
based on Path, and then iff TypeUrn is URNFileArtifact, additionally setting
the Sha256 attribute, if any.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]