oh, code, ya, ! lol here:
input2 = open('enwik8.txt', 'r', encoding='latin-1').read()
word1words = []
word2words = []
word1counts = []
word2counts = []
for count2 in range(100000000):
window = input2[count2: count2 + 12]
if window[-8:] == ' mother ':
word = ''
for i in range(50):
if input2[count2+12+i] == ' ':
break
word = word + input2[count2+12+i]
found = 0
for i in range(len(word1words)):
if word == word1words[i]:
word1counts[i] += 1
found = 1
if found == 0:
word1words.append(word)
word1counts.append(1)
elif window[-8:] == ' follow ':
word = ''
for i in range(50):
if input2[count2+12+i] == ' ':
break
word = word + input2[count2+12+i]
found = 0
for i in range(len(word2words)):
if word == word2words[i]:
word2counts[i] += 1
found = 1
if found == 0:
word2words.append(word)
word2counts.append(1)
minus = 0
for i in range(len(word1counts)):
try:
if word1counts[i] > 50:
del word1counts[i-minus]
del word1words[i-minus]
minus += 1
except:
aa = 0
minus = 0
for i in range(len(word2counts)):
try:
if word2counts[i] > 50:
del word2counts[i-minus]
del word2words[i-minus]
minus += 1
except:
aa = 0
if len(word1words) > len(word2words):
biggerwords = word1words
smallerwords = word2words
smallercounts = word2counts
biggercounts = word1counts
else:
biggerwords = word2words
smallerwords = word1words
smallercounts = word1counts
biggercounts = word2counts
wordsum1 = sum(biggercounts)
wordsum2 = sum(smallercounts)
normcreator = wordsum1 / wordsum2
if wordsum1 > wordsum2:
biggersum = wordsum1
else:
biggersum = wordsum2
shared = 0
for i in range(len(smallerwords)):
for ii in range(len(biggerwords)):
if smallerwords[i] == biggerwords[ii]:
shared += (biggercounts[ii] * ((smallercounts[i] * normcreator) /
biggercounts[ii]))
print(shared / biggersum)
# get the amount of words that follow and the amount of types of words that
follow, confidence = total / (types * 10) + another such averaged
------------------------------------------
Artificial General Intelligence List: AGI
Permalink:
https://agi.topicbox.com/groups/agi/T01fa5e447808d368-M8ff8e94a4fd468661eae30d3
Delivery options: https://agi.topicbox.com/groups/agi/subscription