oh, code, ya, ! lol here:

input2 = open('enwik8.txt', 'r', encoding='latin-1').read()
word1words = []
word2words = []
word1counts = []
word2counts = []

for count2 in range(100000000):
  window = input2[count2: count2 + 12]

  if window[-8:] == ' mother ':
    word = ''
    for i in range(50):
      if input2[count2+12+i] == ' ':
        break
      word = word + input2[count2+12+i]
    found = 0
    for i in range(len(word1words)):
      if word == word1words[i]:
        word1counts[i] += 1
        found = 1
    if found == 0:
      word1words.append(word)
      word1counts.append(1)

  elif window[-8:] == ' follow ':
    word = ''
    for i in range(50):
      if input2[count2+12+i] == ' ':
        break
      word = word + input2[count2+12+i]
    found = 0
    for i in range(len(word2words)):
      if word == word2words[i]:
        word2counts[i] += 1
        found = 1
    if found == 0:
      word2words.append(word)
      word2counts.append(1)

minus = 0
for i in range(len(word1counts)):
  try:
    if word1counts[i] > 50:
      del word1counts[i-minus]
      del word1words[i-minus]
      minus += 1
  except:
    aa = 0

minus = 0
for i in range(len(word2counts)):
  try:
    if word2counts[i] > 50:
      del word2counts[i-minus]
      del word2words[i-minus]
      minus += 1
  except:
    aa = 0

if len(word1words) > len(word2words):
  biggerwords = word1words
  smallerwords = word2words
  smallercounts = word2counts
  biggercounts = word1counts
else:
  biggerwords = word2words
  smallerwords = word1words
  smallercounts = word1counts
  biggercounts = word2counts

wordsum1 = sum(biggercounts)
wordsum2 = sum(smallercounts)
normcreator = wordsum1 / wordsum2
if wordsum1 > wordsum2:
  biggersum = wordsum1
else:
  biggersum = wordsum2

shared = 0
for i in range(len(smallerwords)):
  for ii in range(len(biggerwords)):
    if smallerwords[i] == biggerwords[ii]:
      shared += (biggercounts[ii] * ((smallercounts[i] * normcreator) / 
biggercounts[ii]))

print(shared / biggersum)

# get the amount of words that follow and the amount of types of words that 
follow, confidence = total / (types * 10) + another such averaged

------------------------------------------
Artificial General Intelligence List: AGI
Permalink: 
https://agi.topicbox.com/groups/agi/T01fa5e447808d368-M8ff8e94a4fd468661eae30d3
Delivery options: https://agi.topicbox.com/groups/agi/subscription

Reply via email to