and the new code:

input2 = open('lc.txt', 'r', encoding='latin-1').read()
preword1words = []
preword2words = []
preword1counts = []
preword2counts = []

betterbe10timesseenatleast1 = 0
betterbe10timesseenatleast2 = 0
for count2 in range(1000000000):
  window = input2[count2: count2 + 12]

  searchfor = 'computer'
  sl = len(searchfor) + 1
  if window[-(sl-1):] == searchfor:
    if window[-sl] == ' ' or window[-sl] == ',' or window[-sl] == '.' or 
window[-sl] == '[' or window[-sl] == ']' or window[-sl] == '(' or window[-sl] 
== ')' or window[-sl] == ':' or window[-sl] == ';' or window[-sl] == '\n' or 
window[-sl] == '\t' or window[-sl] == '\r':
      if input2[count2 + 12] == ' ' or input2[count2 + 12] == ',' or 
input2[count2 + 12] == '.' or input2[count2 + 12] == '[' or input2[count2 + 12] 
== ']' or input2[count2 + 12] == '(' or input2[count2 + 12] == ')' or 
input2[count2 + 12] == ':' or input2[count2 + 12] == ';' or input2[count2 + 12] 
== '\n' or input2[count2 + 12] == '\t' or input2[count2 + 12] == '\r':
        betterbe10timesseenatleast1 += 1

        word = ''
        passed = 0
        for i in range(50):
          if passed == 1:
            if input2[count2 + 12 + i] == ' ' or input2[count2 + 12 + i] == ',' 
or input2[count2 + 12 + i] == '.' or input2[count2 + 12 + i] == '[' or 
input2[count2 + 12 + i] == ']' or input2[count2 + 12 + i] == '(' or 
input2[count2 + 12 + i] == ')' or input2[count2 + 12 + i] == ':' or 
input2[count2 + 12 + i] == ';' or input2[count2 + 12 + i] == '\n' or 
input2[count2 + 12 + i] == '\t' or input2[count2 + 12 + i] == '\r':
              break
            word = word + input2[count2 + 12 + i]
          if passed == 0 and input2[count2 + 12 + i] == ' ':
            passed = 1
          elif passed == 0:
            break
        found = 0
        if len(word) > 0:
          for i in range(len(preword1words)):
            if word == preword1words[i]:
              preword1counts[i] += 1
              found = 1
          if found == 0:
            preword1words.append(word)
            preword1counts.append(1)

        word = ''
        passed = 0
        for i in range(len(window) - sl+1):
          if passed == 1:
            if window[-sl - i] == ' ' or window[-sl - i] == ',' or window[-sl - 
i] == '.' or window[-sl - i] == '[' or window[-sl - i] == ']' or window[-sl - 
i] == '(' or window[-sl - i] == ')' or window[-sl - i] == ':' or window[-sl - 
i] == ';' or window[-sl - i] == '\n' or window[-sl - i] == '\t' or window[-sl - 
i] == '\r':
              break
            word = window[-sl-i] + word
          if passed == 0 and window[-sl-i] == ' ':
            passed = 1
          elif passed == 0:
            break
        found = 0
        if len(word) > 0:
          for i in range(len(preword1words)):
            if word == preword1words[i]:
              preword1counts[i] += 1
              found = 1
          if found == 0:
            preword1words.append(word)
            preword1counts.append(1)

  else:
    searchfor = 'table'
    sl = len(searchfor) + 1
    if window[-(sl-1):] == searchfor:
      if window[-sl] == ' ' or window[-sl] == ',' or window[-sl] == '.' or 
window[-sl] == '[' or window[-sl] == ']' or window[-sl] == '(' or window[-sl] 
== ')' or window[-sl] == ':' or window[-sl] == ';' or window[-sl] == '\n' or 
window[-sl] == '\t' or window[-sl] == '\r':
        if input2[count2 + 12] == ' ' or input2[count2 + 12] == ',' or 
input2[count2 + 12] == '.' or input2[count2 + 12] == '[' or input2[count2 + 12] 
== ']' or input2[count2 + 12] == '(' or input2[count2 + 12] == ')' or 
input2[count2 + 12] == ':' or input2[count2 + 12] == ';' or input2[count2 + 12] 
== '\n' or input2[count2 + 12] == '\t' or input2[count2 + 12] == '\r':
          betterbe10timesseenatleast2 += 1

          word = ''
          passed = 0
          for i in range(50):
            if passed == 1:
              if input2[count2 + 12 + i] == ' ' or input2[count2 + 12 + i] == 
',' or input2[count2 + 12 + i] == '.' or input2[count2 + 12 + i] == '[' or 
input2[count2 + 12 + i] == ']' or input2[count2 + 12 + i] == '(' or 
input2[count2 + 12 + i] == ')' or input2[count2 + 12 + i] == ':' or 
input2[count2 + 12 + i] == ';' or input2[count2 + 12 + i] == '\n' or 
input2[count2 + 12 + i] == '\t' or input2[count2 + 12 + i] == '\r':
                break
              word = word + input2[count2 + 12 + i]
            if passed == 0 and input2[count2 + 12 + i] == ' ':
              passed = 1
            elif passed == 0:
              break
          found = 0
          if len(word) > 0:
            for i in range(len(preword2words)):
              if word == preword2words[i]:
                preword2counts[i] += 1
                found = 1
            if found == 0:
              preword2words.append(word)
              preword2counts.append(1)

          word = ''
          passed = 0
          for i in range(len(window) - sl+1):
            if passed == 1:
              if window[-sl - i] == ' ' or window[-sl - i] == ',' or window[-sl 
- i] == '.' or window[-sl - i] == '[' or window[-sl - i] == ']' or window[-sl - 
i] == '(' or window[-sl - i] == ')' or window[-sl - i] == ':' or window[-sl - 
i] == ';' or window[-sl - i] == '\n' or window[-sl - i] == '\t' or window[-sl - 
i] == '\r':
                break
              word = window[-sl - i] + word
            if passed == 0 and window[-sl - i] == ' ':
              passed = 1
            elif passed == 0:
              break
          found = 0
          if len(word) > 0:
            for i in range(len(preword2words)):
              if word == preword2words[i]:
                preword2counts[i] += 1
                found = 1
            if found == 0:
              preword2words.append(word)
              preword2counts.append(1)

if betterbe10timesseenatleast1 < 10 or betterbe10timesseenatleast2 < 10:
  print('should use words that both have a minimum of 10 counts at least')




for i in range(len(preword1counts)):
  if preword1counts[i] > 180:
    preword1counts[i] = 180

for i in range(len(preword2counts)):
  if preword2counts[i] > 180:
    preword2counts[i] = 180



tree = ['',[]]
for count2 in range(10000000):
  window = input2[count2: count2 + 15]
  node = 0
  for i in range(len(window)):
    char_index = tree[node].find(window[i]) + 1
    if char_index == 0:
      tree[node] += window[i]
      tree[node + 1].insert(len(tree[node])-1,1)
      tree[node + 1].append(len(tree))
      tree.extend(('',[]))
      break
    tree[node + 1][char_index - 1] += 1
    node = tree[node + 1][len(tree[node]) + char_index - 1]

word1words = []
word1counts = []
for i in range(len(preword1words)):
  window = ' ' + preword1words[i]
  node = 0
  try:
    for ii in range(len(window)):
      char_index = tree[node].find(window[ii]) + 1
      node = tree[node + 1][len(tree[node]) + char_index - 1]
    char_index = tree[node].find(' ') + 1
    if tree[node+1][char_index - 1] < 270:
      word1words.append(preword1words[i])
      word1counts.append(preword1counts[i])
  except:
    word1words.append(preword1words[i])
    word1counts.append(preword1counts[i])

word2words = []
word2counts = []
for i in range(len(preword2words)):
  window = ' ' + preword2words[i]
  node = 0
  try:
    for ii in range(len(window)):
      char_index = tree[node].find(window[ii]) + 1
      node = tree[node + 1][len(tree[node]) + char_index - 1]
    char_index = tree[node].find(' ') + 1
    if tree[node+1][char_index - 1] < 270:
      word2words.append(preword2words[i])
      word2counts.append(preword2counts[i])
  except:
    word2words.append(preword2words[i])
    word2counts.append(preword2counts[i])



wordsum1 = sum(word1counts)
wordsum2 = sum(word2counts)
if wordsum1 > wordsum2:
  normcreator = wordsum1 / wordsum2
  for i in range(len(word2counts)):
    word2counts[i] = word2counts[i] * normcreator
  biggersum = wordsum1
else:
  normcreator = wordsum2 / wordsum1
  for i in range(len(word1counts)):
    word1counts[i] = word1counts[i] * normcreator
  biggersum = wordsum2

shared = 0
for i in range(len(word1words)):
  for ii in range(len(word2words)):
    if word1words[i] == word2words[ii]:
      if word1counts[i] < word2counts[ii]:
        smallernumber = word1counts[i]
        biggernumber = word2counts[ii]
      else:
        smallernumber = word2counts[ii]
        biggernumber = word1counts[i]
      shared += (biggernumber * (smallernumber / biggernumber))

print(shared / biggersum)

# get the amount of words that follow and the amount of types of words that 
follow, confidence = total / (types * 10) + another such averaged

------------------------------------------
Artificial General Intelligence List: AGI
Permalink: 
https://agi.topicbox.com/groups/agi/T01fa5e447808d368-M3763f7b3473400334eae7e7c
Delivery options: https://agi.topicbox.com/groups/agi/subscription

Reply via email to