must sleep.....works.....
input2 = open('enwik8.txt', 'r', encoding='latin-1').read()
word1words = []
word2words = []
word1counts = []
word2counts = []
for count2 in range(100000000):
window = input2[count2: count2 + 5]
if window == ' win ':
word = ''
for i in range(50):
if input2[count2+5+i] == ' ':
break
word = word + input2[count2+5+i]
found = 0
for i in range(len(word1words)):
if word == word1words[i]:
word1counts[i] += 1
found = 1
if found == 0:
word1words.append(word)
word1counts.append(1)
elif window == ' won ':
word = ''
for i in range(50):
if input2[count2+5+i] == ' ':
break
word = word + input2[count2+5+i]
found = 0
for i in range(len(word2words)):
if word == word2words[i]:
word2counts[i] += 1
found = 1
if found == 0:
word2words.append(word)
word2counts.append(1)
minus = 0
for i in range(len(word1counts)):
try:
if word1counts[i] > 50:
del word1counts[i-minus]
del word1words[i-minus]
minus += 1
except:
aa = 0
minus = 0
for i in range(len(word2counts)):
try:
if word2counts[i] > 50:
del word2counts[i-minus]
del word2words[i-minus]
minus += 1
except:
aa = 0
word1sum = sum(word1counts)
word2sum = sum(word2counts)
if word1sum > word2sum:
normcreator = word1sum / word2sum
smallercounts = word2counts
biggercounts = word1counts
else:
normcreator = word2sum / word1sum
smallercounts = word1counts
biggercounts = word2counts
if len(word1words) > len(word2words):
biggerwords = word1words
smallerwords = word2words
else:
biggerwords = word2words
smallerwords = word1words
shared = 0
for i in range(len(smallerwords)):
for ii in range(len(biggerwords)):
if smallerwords[i] == biggerwords[ii]:
shared += (biggercounts[ii] * ((smallercounts[i] * normcreator) /
biggercounts[ii]))
print(shared / sum(biggercounts))
# get the amount of words that follow and the amount of types of words that
follow, confidence = total / (types * 10) + another such averaged
------------------------------------------
Artificial General Intelligence List: AGI
Permalink:
https://agi.topicbox.com/groups/agi/T01fa5e447808d368-M900987a57361d87a494039ad
Delivery options: https://agi.topicbox.com/groups/agi/subscription