Hi All,
We've been working with a search enabled bucket in riak for a while now and
off and on it has been giving us trouble. In the past it has been solved
by reindexing all the data by just reading and writing the data back into
riak. But even this is failing now on some input data. Any help/insite
would be greatly appreciated.
We are on riak 1.4
We have recently switched to riak python api 2.0
smrtv@fre-prod-svr15:~$ python
Python 2.7.3 (default, Aug 1 2012, 05:14:39)
[GCC 4.6.3] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> import riak
>>> r = riak.RiakClient()
>>> b = r.bucket('ctv_tvdata')
>>> o = b.get('/data/v2/search_show/TMS.Show.9838380')
>>> o.data
{'type': 'show', 'expires': '9999999999', 'subject_name': 'Monsters vs.
Aliens', 'sub_type': 'Series', 'topic':
'__ref--/data/v2/topic/TMS.Show.9838380:r1384276501.854346', 'person':
'__None__', 'searchable_key': 'aliens vs monstersvsaliens monsters',
'date': '2013-11-23', 'sport': '__None__', 'genre': 'Children', 'id':
'/data/v2/search_show/TMS.Show.9838380'}
>>> o.store()
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/local/lib/python2.7/dist-packages/riak/riak_object.py", line
281, in store
timeout=timeout)
File "/usr/local/lib/python2.7/dist-packages/riak/client/transport.py",
line 127, in wrapper
return self._with_retries(pool, thunk)
File "/usr/local/lib/python2.7/dist-packages/riak/client/transport.py",
line 69, in _with_retries
return fn(transport)
File "/usr/local/lib/python2.7/dist-packages/riak/client/transport.py",
line 125, in thunk
return fn(self, transport, *args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/riak/client/operations.py",
line 289, in put
timeout=timeout)
File
"/usr/local/lib/python2.7/dist-packages/riak/transports/http/transport.py",
line 144, in put
return self._parse_body(robj, response, [200, 201, 204, 300])
File
"/usr/local/lib/python2.7/dist-packages/riak/transports/http/codec.py",
line 64, in _parse_body
self.check_http_code(status, expected_statuses)
File
"/usr/local/lib/python2.7/dist-packages/riak/transports/http/transport.py",
line 446, in check_http_code
(expected_statuses, status))
Exception: Expected status [200, 201, 204, 300], received 500
Using protocol buffs gives an erlang riak_search_kv_hook,precommit,error:
>>> r = riak.RiakClent(protocol='pcb')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
AttributeError: 'module' object has no attribute 'RiakClent'
>>> r = riak.RiakClient(protocol='pcb')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/local/lib/python2.7/dist-packages/riak/client/__init__.py",
line 99, in __init__
self.protocol = protocol or 'http'
File "/usr/local/lib/python2.7/dist-packages/riak/client/__init__.py",
line 118, in _set_protocol
repr(self.PROTOCOLS))
ValueError: protocol option is invalid, must be one of ['http', 'https',
'pbc']
>>> r = riak.RiakClient(protocol='pbc')
>>> b = r.bucket('ctv_tvdata')
>>> o = b.get('/data/v2/search_show/TMS.Show.9838380')
>>> o.store()
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/local/lib/python2.7/dist-packages/riak/riak_object.py", line
281, in store
timeout=timeout)
File "/usr/local/lib/python2.7/dist-packages/riak/client/transport.py",
line 127, in wrapper
return self._with_retries(pool, thunk)
File "/usr/local/lib/python2.7/dist-packages/riak/client/transport.py",
line 69, in _with_retries
return fn(transport)
File "/usr/local/lib/python2.7/dist-packages/riak/client/transport.py",
line 125, in thunk
return fn(self, transport, *args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/riak/client/operations.py",
line 289, in put
timeout=timeout)
File
"/usr/local/lib/python2.7/dist-packages/riak/transports/pbc/transport.py",
line 194, in put
MSG_CODE_PUT_RESP)
File
"/usr/local/lib/python2.7/dist-packages/riak/transports/pbc/connection.py",
line 43, in _request
return self._recv_msg(expect)
File
"/usr/local/lib/python2.7/dist-packages/riak/transports/pbc/connection.py",
line 55, in _recv_msg
raise RiakError(err.errmsg)
riak.RiakError: '{precommit_fail,\n {hook_crashed,\n
{riak_search_kv_hook,precommit,error,\n {badmatch,\n
[{{dict,3,16,16,8,80,48,\n
{[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]},\n
{{[],[],[],[],[],[],[],[],[],[],\n
[[<<"X-Riak-VTag">>,50,90,85,77,113,86,72,111,75,121,\n
86,89,72,118,114,103,70,70,114,55,88,52]],\n
[[<<"index">>]],\n [],\n
[[<<"X-Riak-Last-Modified">>|{1384,276502,759295}]],\n
[],[]}}},\n {riak_idx_doc,<<"ctv_tvdata">>,\n
<<"/data/v2/search_show/TMS.Show.9838380">>,\n
[{<<"date">>,<<"2013-11-23">>,[{<<"2013-11-23">>,[0]}]},\n
{<<"expires">>,<<"9999999999">>,\n
[{<<"9999999999">>,[0]}]},\n
{<<"genre">>,<<"Children">>,[{<<"Children">>,[0]}]},\n
{<<"id">>,<<"/data/v2/search_show/TMS.Show.9838380">>,\n
[{<<"/data/v2/search_show/TMS.Show.9838380">>,[0]}]},\n
{<<"person">>,<<"__None__">>,[{<<"__None__">>,[0]}]},\n
{<<"searchable_key">>,\n <<"aliens vs
monstersvsaliens monsters">>,\n
[{<<"monsters">>,[3]},\n {<<"vs">>,[1]},\n
{<<"aliens">>,[0]},\n
{<<"monstersvsaliens">>,[2]}]},\n
{<<"sport">>,<<"__None__">>,[{<<"__None__">>,[0]}]},\n
{<<"sub_type">>,<<"Series">>,[{<<"Series">>,[0]}]},\n
{<<"subject_name">>,<<"Monsters vs. Aliens">>,\n
[{<<"vs.">>,[1]},\n {<<"Monsters">>,[0]},\n
{<<"Aliens">>,[2]}]},\n
{<<"topic">>,\n
<<"__ref--/data/v2/topic/TMS.Show.9838380:r1384276501.854346">>,\n
[{<<"__ref--/data/v2/topic/TMS.Show.9838380:r1384276501.854346">>,\n
[0]}]},\n
{<<"type">>,<<"show">>,[{<<"show">>,[0]}]}],\n [],\n
[{<<"expires">>,<<"9999999999">>,[<<"9999999999">>]},\n
{<<"type">>,<<"show">>,[<<"show">>]}],\n
true}},\n {{dict,3,16,16,8,80,48,\n
{[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]},\n
{{[],[],[],[],[],[],[],[],[],[],\n
[[<<"X-Riak-VTag">>,54,99,78,89,53,77,108,102,82,57,\n
81,88,69,107,104,72,74,98,81,72,114,66]],\n
[[<<"index">>]],\n [],\n
[[<<"X-Riak-Last-Modified">>|{1384,276502,759064}]],\n
[],[]}}},\n {riak_idx_doc,<<"ctv_tvdata">>,\n
<<"/data/v2/search_show/TMS.Show.9838380">>,\n
[{<<"date">>,<<"2013-11-23">>,[{<<"2013-11-23">>,[0]}]},\n
{<<"expires">>,<<"9999999999">>,\n
[{<<"9999999999">>,[0]}]},\n
{<<"genre">>,<<"Children">>,[{<<"Children">>,[0]}]},\n
{<<"id">>,<<"/data/v2/search_show/TMS.Show.9838380">>,\n
[{<<"/data/v2/search_show/TMS.Show.9838380">>,[0]}]},\n
{<<"person">>,<<"__None__">>,[{<<"__None__">>,[0]}]},\n
{<<"searchable_key">>,\n <<"aliens vs
monstersvsaliens monsters">>,\n
[{<<"monsters">>,[3]},\n {<<"vs">>,[1]},\n
{<<"aliens">>,[0]},\n
{<<"monstersvsaliens">>,[2]}]},\n
{<<"sport">>,<<"__None__">>,[{<<"__None__">>,[0]}]},\n
{<<"sub_type">>,<<"Series">>,[{<<"Series">>,[0]}]},\n
{<<"subject_name">>,<<"Monsters vs. Aliens">>,\n
[{<<"vs.">>,[1]},\n {<<"Monsters">>,[0]},\n
{<<"Aliens">>,[2]}]},\n
{<<"topic">>,\n
<<"__ref--/data/v2/topic/TMS.Show.9838380:r1384276501.846692">>,\n
[{<<"__ref--/data/v2/topic/TMS.Show.9838380:r1384276501.846692">>,\n
[0]}]},\n
{<<"type">>,<<"show">>,[{<<"show">>,[0]}]}],\n [],\n
[{<<"expires">>,<<"9999999999">>,[<<"9999999999">>]},\n
{<<"type">>,<<"show">>,[<<"show">>]}],\n
true}}]}}}}'
Any search that would returns a resource that acts this way similarly fails:
>>> r.fulltext_search('ctv_tvdata', "searchable_key:monstersvsaliens")
XXXXXXXXXXXXXXXXXXXXXXXX q: "searchable_key:monstersvsaliens"
index: "ctv_tvdata"
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/local/lib/python2.7/dist-packages/riak/client/transport.py",
line 127, in wrapper
return self._with_retries(pool, thunk)
File "/usr/local/lib/python2.7/dist-packages/riak/client/transport.py",
line 69, in _with_retries
return fn(transport)
File "/usr/local/lib/python2.7/dist-packages/riak/client/transport.py",
line 125, in thunk
return fn(self, transport, *args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/riak/client/operations.py",
line 410, in fulltext_search
return transport.search(index, query, **params)
File
"/usr/local/lib/python2.7/dist-packages/riak/transports/pbc/transport.py",
line 443, in search
MSG_CODE_SEARCH_QUERY_RESP)
File
"/usr/local/lib/python2.7/dist-packages/riak/transports/pbc/connection.py",
line 43, in _request
return self._recv_msg(expect)
File
"/usr/local/lib/python2.7/dist-packages/riak/transports/pbc/connection.py",
line 50, in _recv_msg
self._recv_pkt()
File
"/usr/local/lib/python2.7/dist-packages/riak/transports/pbc/connection.py",
line 71, in _recv_pkt
% len(nmsglen))
riak.RiakError: 'Socket returned short packet length 0 - expected 4'
I also see errors like this in my crash and error logs:
2013-11-18 23:49:49.705 [error] emulator Error in process <0.17776.914> on
node '[email protected]' with exit value:
{{badmatch,[{{dict,3,16,16,8,80,48,{[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]},{{[],[],[],[],[],[],[],[],[],[],[[<<11
bytes>>,52,75,67,79,55,114,90,110,69,77,50,53,121,67,112,76,85,83,113,74,115,104]],[[<<5
bytes>>]],[],[[<<20
bytes>>|{1384,346717,469237}]],[],[]}}},{riak_idx_doc,<<10 bytes>>,<<36
bytes>>,[{<<4 bytes>>,<<8 bytes>>,[{<<8 bytes>>,[0]}]},{<<7 bytes>>,<<10
bytes>>,[{<<10 bytes>>,[0]}]},{<<5 bytes>>,<<6 bytes>>,[{<<6
bytes>>,[0]}]},{<<2 bytes>>,<<36 bytes>>,[{<<36 bytes>>,[0]}]},{<<6
bytes>>,<<8 bytes>>,[{<<8 bytes>>,[0]}]},{<<14 bytes>>,<<45 bytes>>,[{<<7
bytes>>,[0]},{<<5 bytes>>,[1]},{<<9 bytes>>,[2]},{<<21 bytes>>,[3]}]},{<<5
bytes>>,<<3 bytes>>,[{<<3 bytes>>,[0]}]},{<<8 bytes>>,<<8 bytes>>,[{<<8
bytes>>,[0]}]},{<<12 bytes>>,<<23 bytes>>,[{<<5 bytes>>,[1]},{<<9
bytes>>,[2]},{<<7 bytes>>,[...
2013-11-18 23:49:49.834 [error] emulator Error in process <0.18173.914> on
node '[email protected]' with exit value:
{{badmatch,[{{dict,3,16,16,8,80,48,{[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]},{{[],[],[],[],[],[],[],[],[],[],[[<<11
bytes>>,52,75,67,79,55,114,90,110,69,77,50,53,121,67,112,76,85,83,113,74,115,104]],[[<<5
bytes>>]],[],[[<<20
bytes>>|{1384,346717,469237}]],[],[]}}},{riak_idx_doc,<<10 bytes>>,<<36
bytes>>,[{<<4 bytes>>,<<8 bytes>>,[{<<8 bytes>>,[0]}]},{<<7 bytes>>,<<10
bytes>>,[{<<10 bytes>>,[0]}]},{<<5 bytes>>,<<6 bytes>>,[{<<6
bytes>>,[0]}]},{<<2 bytes>>,<<36 bytes>>,[{<<36 bytes>>,[0]}]},{<<6
bytes>>,<<8 bytes>>,[{<<8 bytes>>,[0]}]},{<<14 bytes>>,<<45 bytes>>,[{<<7
bytes>>,[0]},{<<5 bytes>>,[1]},{<<9 bytes>>,[2]},{<<21 bytes>>,[3]}]},{<<5
bytes>>,<<3 bytes>>,[{<<3 bytes>>,[0]}]},{<<8 bytes>>,<<8 bytes>>,[{<<8
bytes>>,[0]}]},{<<12 bytes>>,<<23 bytes>>,[{<<5 bytes>>,[1]},{<<9
bytes>>,[2]},{<<7 bytes>>,[...
My search schema currently looks like this:
%% Custom schema for our index
%% See: http://10.1.3.100:8090/display/REST/Search for some background on
how we index/search
{
schema,
[
{version, "1.1"},
{n_val, 3},
{default_op, "and"},
{default_field, "searchable_key"},
{analyzer_factory, {erlang, text_analyzers,
whitespace_analyzer_factory}}
],
[
%% main field for searching
{field, [
{name, "searchable_key"},
{type, string}
]},
%% In order to use filter queries to reduce the result set to
%% specific object 'types' or with 'expires' >= now, we need
%% to make these fields "inline".
{field, [
{name, "type"},
{type, string},
{inline, true}
]},
{field, [
{name, "expires"},
{type, string},
{inline, true}
]},
{field, [
{name, "likes_count"},
{type, string},
{padding_size, 10}
]},
{field, [
{name, "timestamp"},
{type, string},
{inline, true}
]},
%% Our catch all...
{dynamic_field, [
{name, "*"},
{type, string}
]}
%% Field names ending in "_text" are indexed as full text"
%% DAVE: just keeping this paragraph for reference
%{dynamic_field, [
% {name, "*_text"},
% {type, string},
% {analyzer_factory, {erlang, text_analyzers,
standard_analyzer_factory}}
%]},
%% The original catch all...
%% Everything else is a string
%{dynamic_field, [
%{name, "*"},
%{type, string},
%{analyzer_factory, {erlang, text_analyzers,
whitespace_analyzer_factory}}
%]}
]
}.
I'm seeing this kind of thing in both my stage and production environments.
As far as I can tell my search index is corrupted but I'm not sure how
it's gotten this way. Again any help is appreciated. What is the wrong?
How can I fix it? What could have caused it?
Thanks,
Gabe
_______________________________________________
riak-users mailing list
[email protected]
http://lists.basho.com/mailman/listinfo/riak-users_lists.basho.com