What do yous think?
My current code to do it is this but it only so far returns you the right hand
side 1st words that follow, for 2 words to compare (win and won), output is
below the working python code below. You can see in the output that the two
words share many of the words. Now I have to normalize the bigger distribution
down so it is comparable to the smaller one, and ignore rare/common words, then
see how many of the total are shared, and get a confidence based on how many
experiences it has to how many types of words it has competing. Every word
needs to compare to every word (~5K*5K = 12,497,500 relational
connections/checks), but only has to do this update to a small net every 1MB or
so.
input2 = open('enwik8.txt', 'r', encoding='latin-1').read()
word1words = []
word2words = []
for count2 in range(100000000):
window = input2[count2: count2 + 5]
if window == ' win ':
word = ''
for i in range(50):
if input2[count2+5+i] == ' ':
break
word = word + input2[count2+5+i]
found = 0
for i in range(len(word1words)):
if word == word1words[i][0]:
word1words[i][1] += 1
found = 1
if found == 0:
word1words.append([word,1])
elif window == ' won ':
word = ''
for i in range(50):
if input2[count2+5+i] == ' ':
break
word = word + input2[count2+5+i]
found = 0
for i in range(len(word2words)):
if word == word2words[i][0]:
word2words[i][1] += 1
found = 1
if found == 0:
word2words.append([word,1])
word1words.sort(key=lambda y: y[1])
word2words.sort(key=lambda y: y[1])
print(word1words)
print(word2words)
# get the amount of words that follow and the amount of types of words that
follow, confidence = total / (types * 10) + another such averaged
# search ones next words for each, and if match then tally 1 point, relation =
shared / onestotal
*sorted backwards*:
[["Alabama's", 1], ['support', 1], ['it,', 1], ['[[Alcestis]],', 1],
['foreshadowed', 1], ['Emmy', 1], ['[[rally', 1], ['diplomatic', 1],
['Eurovision,', 1], ['FDA', 1], ['+4-1=6.', 1], ['2–0,', 1],
["Atalanta's", 1], ['[[NCAA', 1], ["Albert's", 1], ["golf's", 1], ['public',
1], ['8', 1], ['world', 1], ['Sunday,', 1], ['club|300th', 1], ['politically',
1], ['militarily."\n*[[1968]]', 1], ["wars'')\n*B.", 1], ["wars''", 1],
['21', 1], ['(also', 1], ['without', 1], ['Atlanta', 1], ['evened', 1],
['pretty', 1], ['away', 1], ['â\x80\x94', 1], ['22', 1], ['16', 1], ['15', 1],
['points.', 1], ['(a', 1], ['would', 1], ['after', 1], ['(or', 1], ['should',
1], ['put', 1], ['"between', 1], ['13', 1], ['behind', 1], ['8-3.', 1],
['Big', 1], ['huge', 1], ['NCAA,', 1], ['who', 1], ['total', 1], ['=', 1],
['enough', 1], ['greater', 1], ['until', 1], ['99', 1], ['games', 1], ['when',
1], ['$10.', 1], ['$10', 1], ['several', 1], ['substantial', 1], ['redress',
1], ['have', 1], ['prizes.', 1], ['17-7,', 1], ['17-16,', 1],
['23-12.\n\n===1966-73:', 1], ['24-21.', 1], ['set', 1], ['20-13.', 1],
['defections"', 1], ['6', 1], ['brought', 1], ['club|Young,', 1],
['prestigious', 1], ['supporters', 1], ['land', 1], ['card', 1], ['office.',
1], ['none', 1], ['Daytona', 1], ['4', 1], ['11', 1], ['7', 1], ['he', 1],
['again', 1], ['wars', 1], ['low.', 1], ['low).\n*', 1], ['high,', 1],
['low),', 1], ['popularity', 1], ['24-20.', 1], ['indeed', 1], ['dive', 1],
['meets.', 1], ['it!"', 1], ['30', 1], ['anything', 1], ['10', 1],
['victory', 1], ['contracts.', 1], ['per', 1], ['7-1.', 1], ['them', 1],
['anything.', 1], ['John', 1], ['championships,', 1], ['plurality', 1],
['seats,', 1], ['gold;', 1], ['gold', 1], ['Southern', 1], ['competitions', 1],
['[[heavyweight]]', 1], ['came', 1], ['8-7.\n*In', 1], ['3-1', 1], ['doubles',
1], ['outright', 1], ['seasons.', 1], ['inspires', 1], ['Falklands', 1],
['[[Best', 1], ['while', 1], ['long-distance', 1], ['additional', 1],
['(less,', 1], ['least', 1], ['but', 1], ['auctions,', 1], ['far', 1],
['[[Academy', 1], ['(see', 1], ['league', 1], ['my', 1], ['(increasing', 1],
['once', 1], ['election.', 1], ['Brünnhilde', 1],
['elections.\n\n==Notes==\n#[http://news.bbc.co.uk/1/hi', 1], ['games,', 1],
['last', 1], ['favour', 1], ['pivous', 1], ['Malayan', 1], ['1st', 1],
['consecutive', 1], ['The', 1], ['Private', 1], ['Jews.', 1], ['national', 1],
['local', 1], ['prizes', 1], ['only', 2], ['Le', 2], ['independence', 2],
['him', 2], ['since', 2], ['her', 2], ['are', 2], ['even', 2], ['it', 2],
['World', 2], ['business', 2], ['-', 2], ['one', 2], ['championships', 2],
['votes', 2], ['if', 2], ['streak', 2], ['converts', 2], ['contests', 2], ['5',
2], ['[[Super', 2], ['', 2], ['this', 2], ['those', 2], ['all', 3], ['its', 3],
['seats', 3], ['many', 3], ['three', 3], ['two', 3], ['or', 3], ['2', 3],
['Game', 3], ['is', 3], ['your', 3], ['back-to-back', 3], ['3', 3], ['either',
3], ['that', 4], ['another', 4], ['four', 4], ['with', 4], ['what', 4],
['back', 4], ['on', 5], ['to', 5], ['his', 6], ['more', 7], ['any', 7],
['both', 7], ['was', 7], ['of', 8], ['for', 9], ['by', 9], ['against', 11],
['an', 12], ['and', 13], ['at', 15], ['their', 17], ['in', 31], ['over', 45],
['a', 77], ['the', 211]]
[["Alabama's", 1], ['entirely', 1], ['77', 1], ['6-7,', 1], ['if', 1],
["1995's", 1], ['using', 1], ['given', 1], ['re-election', 1], ['UN', 1],
['55.4%', 1], ['competition', 1], ['third', 1], ['hands', 1], ['Grand', 1],
['one.', 1], ['very', 1], ['(11–7,', 1], ['England', 1], ['36.9%',
1], ['40.8%', 1], ['convincingly', 1], ['(some', 1], ['\na', 1], ['immense',
1], ['himself', 1], ['Cyprus', 1], ['was', 1], ['(winning', 1], ['national',
1], ['these', 1], ['bronze,', 1], ['two-zip"', 1], ['city-council', 1],
['[[List', 1], ['41-10', 1], ['16', 1], ['41', 1], ['25', 1], ['39', 1],
['Turn-based', 1], ['worldwide', 1], ['[[Golden', 1], ['under', 1], ['Chekhov',
1], ['2-1.\nThe', 1], ['forced', 1], ['decisively.', 1], ['postive', 1], ['17',
1], ["it's", 1], ['your', 1], ['family)\n\n*Order', 1], ['among', 1],
['Spears', 1], ['those', 1], ['22%', 1], ['18%.', 1], ['33', 1], ['6,', 1],
['1', 1], ["Bulgaria's", 1], ['universal', 1], ['4-0', 1], ['4-3', 1],
['independence', 1], ['right', 1], ['is', 1], ['genuine', 1], ['116;', 1],
['111;', 1], ['114;', 1], ['116', 1], ['can', 1], ['[[The', 1], ['2-1.', 1],
['48%', 1], ['power', 1], ['Olympic', 1], ['seventy-five', 1], ['middleweight',
1], ['titles', 1], ['againist', 1], ['each', 1], ['easily,', 1], ['20-17', 1],
['[[AFL]]', 1], ['playoff', 1], ['5-1,', 1], ['10â\x80\x939,', 1], ['favour',
1], ['during', 1], ['extracts,', 1], ['44.7%', 1], ['once.', 1], ['Thornton',
1], ['barely', 1], ['[[united', 1], ['as', 1], ['Wimbledon', 1],
['8-6.\n\nBorg', 1], ['what', 1], ['four)', 1], ['7', 1], ['6).\n\n==', 1],
['comfortably', 1], ['relection', 1], ['486', 1], ['102', 1], ['75-64.\n\n==',
1], ['13', 1], ['38-18)\n*', 1], ['36-14)\n*', 1], ['26-18)\n*', 1],
['44-28)\n*', 1], ['48-28)\n\n===Most', 1], ['50-6)\n*', 1],
['58-4)\n\n===Most', 1], ['acceptance', 1], ['sizeable', 1], ['54.3%', 1],
['single-candidate', 1], ['52.5', 1], ['45.6', 1], ['free', 1], ['53.5%', 1],
['seven.', 1], ['easily.\n\nHowever,', 1], ['administrative', 1], ['despite',
1], ['Commonwealth', 1], ['rave', 1], ['four,', 1], ['47%', 1], ['-', 1],
['fewer', 1], ['95', 1], ['99', 1], ['out,', 1], ['handily', 1],
['[[Litchfield', 1], ['38-14.\n\nWithout', 1], ['nine', 1], ['23-20,', 1],
['37-21.', 1], ['21', 1], ['67', 1], ['81', 1], ['96', 1], ['15,000', 1],
['successive', 1], ['[[Regional', 1], ['ILM', 1], ['major', 1], ['acceptance,',
1], ['everything.\n\nThe', 1], ['4-0.', 1], ['([[February', 1], ["France's",
1], ['early', 1], ['Delaware', 1], ['100', 1], ['one-third', 1], ['Dre', 1],
['49%', 1], ['"Golden', 1], ['(put', 1], ['6', 1], ['other', 1], ["'Best",
1], ['109', 1], ['[[FA', 1], ['election', 1], ['Grammy', 1], ['seventy', 1],
['reelection', 1], ['outright', 1], ['7-1,', 1], ['69', 1], ['Elton', 1],
['Eminem', 1], ['wars', 1], ['twelve', 1], ['40%', 1], ['overall', 1],
['(with', 1], ['had', 1], ['large', 1], ['61', 1], ['53', 1], ['landslide', 1],
['gold,', 1], ['Osgiliath.', 1], ['best', 1], ['3).', 1], ['close', 1],
['influence', 1], ['three.', 1], ['$1.2', 1], ['37', 1], ['directing', 1],
['independence.', 1], ['modern', 1], ['popular', 1], ['1-2-3.\n\nThe', 1],
['again', 1], ['19%', 1], ['84', 1], ['31', 1], ['37.', 1], ['68%', 1], ['56',
1], ['71', 1], ['76', 1], ['awards.\n\n==History==\n===Prior', 1], ['125', 1],
['1-0,', 1], ['anything).', 1], ['2-0.', 1], ['3-2', 1], ['49', 1], ['Her', 1],
['over—including', 1], ['convincingly.', 1], ['outright)', 1],
['about', 1], ['power;', 1], ['big', 1], ['67%', 1], ["Haiti's", 1], ['55.',
1], ['==\n(Paragraph', 1], ['57%', 1], ['Connick', 1], ['game', 1],
['convincing', 1], ['172', 1], ['150', 1], ['but', 1], ['nominal', 1], ['23',
1], ['"Campaign', 1], ['[[London', 1], ['Bogart', 1], ['legislative', 1],
['Kentucky', 1], ['some', 1], ['34%', 1], ['shares', 1], ['without', 1], ['24',
1], ['19.', 1], ['12,', 1], ['10.', 1], ['solely', 1], ['34-31', 1], ['45-37',
1], ['without."', 1], ['02.%', 1], ['0.6%', 1], ['3', 1], ['agreement',
1], ['strong', 1], ['key', 1], ['no', 2], ['after', 2], ['much', 2],
['[[Academy', 2], ['between', 2], ['[[World', 2], ['World', 2], ['important',
2], ['first', 2], ['thirteen', 2], ['just', 2], ['multiple', 2], ['prizes', 2],
['pre-term', 2], ['[[Hugo', 2], ['or', 2], ['through', 2], ['18', 2], ['back',
2], ['because', 2], ['4-2', 2], ['29', 2], ['eleven', 2], ['12', 2],
['support', 2], ['significant', 2], ['', 2], ['57', 2], ['44-6)\n*', 2], ['63',
2], ['presidential', 2], ['2', 2], ['international', 2], ['4', 2], ['them', 2],
['9', 2], ['8', 2], ['further', 2], ['Trudeau', 2], ['critical', 2], ['wide',
2], ['acclaim', 2], ['widespread', 2], ['before', 2], ['second', 2], ['enough',
2], ['approval', 2], ['ten', 2], ['consecutive', 2], ['easily', 2], ['2-1', 2],
['1-0', 2], ['plaudits', 2], ['control', 3], ['14', 3], ['gold', 3],
['promotion', 3], ['games', 3], ['awards', 3], ['battles', 3], ['[[Super', 3],
['world', 3], ['recognition', 3], ['from', 3], ['either', 3], ['that', 3],
['98', 3], ['5', 3], ['"Most', 3], ['10', 3], ['almost', 3], ['The', 3],
['elections', 3], ['six', 4], ['victories', 4], ['27', 4], ['for', 4], ['fame',
4], ['most', 4], ['another', 4], ['Best', 4], ['seats', 4], ['out.', 4],
['back-to-back', 4], ['out', 4], ['eight', 5], ['it', 5], ['her', 5], ['every',
5], ['this', 6], ['only', 6], ['seven', 6], ['five', 6], ['and', 7],
['numerous', 7], ['against', 7], ['Game', 8], ['one', 9], ['with', 9], ['both',
9], ['on', 9], ['many', 10], ['more', 12], ['four', 14], ['at', 15], ['over',
15], ['several', 15], ['all', 18], ['its', 18], ['three', 19], ['him', 23],
['two', 26], ['his', 35], ['their', 38], ['in', 43], ['an', 48], ['by', 103],
['a', 188], ['the', 679]]
------------------------------------------
Artificial General Intelligence List: AGI
Permalink:
https://agi.topicbox.com/groups/agi/T192296c5c5a27230-M5a91288d0affffacf21d095b
Delivery options: https://agi.topicbox.com/groups/agi/subscription