must sleep.....works.....


input2 = open('enwik8.txt', 'r', encoding='latin-1').read()
word1words = []
word2words = []
word1counts = []
word2counts = []

for count2 in range(100000000):
  window = input2[count2: count2 + 5]

  if window == ' win ':
    word = ''
    for i in range(50):
      if input2[count2+5+i] == ' ':
        break
      word = word + input2[count2+5+i]
    found = 0
    for i in range(len(word1words)):
      if word == word1words[i]:
        word1counts[i] += 1
        found = 1
    if found == 0:
      word1words.append(word)
      word1counts.append(1)

  elif window == ' won ':
    word = ''
    for i in range(50):
      if input2[count2+5+i] == ' ':
        break
      word = word + input2[count2+5+i]
    found = 0
    for i in range(len(word2words)):
      if word == word2words[i]:
        word2counts[i] += 1
        found = 1
    if found == 0:
      word2words.append(word)
      word2counts.append(1)

minus = 0
for i in range(len(word1counts)):
  try:
    if word1counts[i] > 50:
      del word1counts[i-minus]
      del word1words[i-minus]
      minus += 1
  except:
    aa = 0

minus = 0
for i in range(len(word2counts)):
  try:
    if word2counts[i] > 50:
      del word2counts[i-minus]
      del word2words[i-minus]
      minus += 1
  except:
    aa = 0

word1sum = sum(word1counts)
word2sum = sum(word2counts)
if word1sum > word2sum:
  normcreator = word1sum / word2sum
  smallercounts = word2counts
  biggercounts = word1counts
else:
  normcreator = word2sum / word1sum
  smallercounts = word1counts
  biggercounts = word2counts

if len(word1words) > len(word2words):
  biggerwords = word1words
  smallerwords = word2words
else:
  biggerwords = word2words
  smallerwords = word1words

shared = 0
for i in range(len(smallerwords)):
  for ii in range(len(biggerwords)):
    if smallerwords[i] == biggerwords[ii]:
      shared += (biggercounts[ii] * ((smallercounts[i] * normcreator) / 
biggercounts[ii]))

print(shared / sum(biggercounts))

# get the amount of words that follow and the amount of types of words that 
follow, confidence = total / (types * 10) + another such averaged

------------------------------------------
Artificial General Intelligence List: AGI
Permalink: 
https://agi.topicbox.com/groups/agi/T01fa5e447808d368-M900987a57361d87a494039ad
Delivery options: https://agi.topicbox.com/groups/agi/subscription

Reply via email to