[
https://issues.apache.org/jira/browse/BEAM-7860?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Niels Stender updated BEAM-7860:
--------------------------------
Description:
In the presence of mixed type keys, v1new ReadFromDatastore may return
duplicate items. The attached example returns 4 records, not the expected 3.
{code:java}
// code placeholder
import apache_beam as beam
from apache_beam.io.gcp.datastore.v1new.types import Key, Entity, Query
from apache_beam.io.gcp.datastore.v1new import datastoreio
config = dict(project='your-google-project', namespace='test')
def test_mixed():
keys = [
Key([u'mixed', u'10038260-iperm_eservice'], **config),
Key([u'mixed', 4812224868188160L], **config),
Key([u'mixed', u'99152975-pointshop'], **config)
]
entities = map(lambda key: types.Entity(key=key), keys)
with beam.Pipeline() as p:
(p
| beam.Create(entities)
| datastoreio.WriteToDatastore(project=config['project'])
)
query = Query(kind=u'mixed', **args)
with beam.Pipeline() as p:
(p
| datastoreio.ReadFromDatastore(query=query, num_splits=4)
| beam.io.WriteToText('tmp.txt', num_shards=1,
shard_name_template='')
)
items = open('tmp.txt').read().strip().split('\n')
assert len(items) == 3, 'incorrect number of items'
{code}
was:
jjj
{code:java}
// code placeholder
import apache_beam as beam
from apache_beam.io.gcp.datastore.v1new.types import Key, Entity, Query
from apache_beam.io.gcp.datastore.v1new import datastoreio
config = dict(project='your-google-project', namespace='test')
def test_mixed():
keys = [
Key([u'mixed', u'10038260-iperm_eservice'], **config),
Key([u'mixed', 4812224868188160L], **config),
Key([u'mixed', u'99152975-pointshop'], **config)
]
entities = map(lambda key: types.Entity(key=key), keys)
with beam.Pipeline() as p:
(p
| beam.Create(entities)
| datastoreio.WriteToDatastore(project=config['project'])
)
query = Query(kind=u'mixed', **args)
with beam.Pipeline() as p:
(p
| datastoreio.ReadFromDatastore(query=query, num_splits=4)
| beam.io.WriteToText('tmp.txt', num_shards=1,
shard_name_template='')
)
items = open('tmp.txt').read().strip().split('\n')
assert len(items) == 3, 'incorrect number of items'
{code}
> v1new ReadFromDatastore splits incorrectly in the presence of mixed type keys
> -----------------------------------------------------------------------------
>
> Key: BEAM-7860
> URL: https://issues.apache.org/jira/browse/BEAM-7860
> Project: Beam
> Issue Type: Bug
> Components: io-python-gcp
> Affects Versions: 2.13.0
> Environment: Python 2.7
> Reporter: Niels Stender
> Priority: Major
>
> In the presence of mixed type keys, v1new ReadFromDatastore may return
> duplicate items. The attached example returns 4 records, not the expected 3.
>
> {code:java}
> // code placeholder
> import apache_beam as beam
> from apache_beam.io.gcp.datastore.v1new.types import Key, Entity, Query
> from apache_beam.io.gcp.datastore.v1new import datastoreio
> config = dict(project='your-google-project', namespace='test')
> def test_mixed():
> keys = [
> Key([u'mixed', u'10038260-iperm_eservice'], **config),
> Key([u'mixed', 4812224868188160L], **config),
> Key([u'mixed', u'99152975-pointshop'], **config)
> ]
> entities = map(lambda key: types.Entity(key=key), keys)
> with beam.Pipeline() as p:
> (p
> | beam.Create(entities)
> | datastoreio.WriteToDatastore(project=config['project'])
> )
> query = Query(kind=u'mixed', **args)
> with beam.Pipeline() as p:
> (p
> | datastoreio.ReadFromDatastore(query=query, num_splits=4)
> | beam.io.WriteToText('tmp.txt', num_shards=1,
> shard_name_template='')
> )
> items = open('tmp.txt').read().strip().split('\n')
> assert len(items) == 3, 'incorrect number of items'
> {code}
--
This message was sent by Atlassian JIRA
(v7.6.14#76016)