Lol I tried it again and it seems the weighting for layers screws it over
mostly and does its job!! Code is below, see lines 33 - 40 I remove sames
from the last previous layer of counts, but only half per each 1 count, and
this only affects the weighting for layers - not the actual counts
distribution per layer. I really tried to make this work and it helps only
little. Maybe later it'll help more. But right now only 0.02MB improvement.
If it were 0.1MB then that would be worth it.

------------------------------------------
Artificial General Intelligence List: AGI
Permalink: 
https://agi.topicbox.com/groups/agi/T3579504be8ceefe6-Ma48a89aae3a7e4f414453ccd
Delivery options: https://agi.topicbox.com/groups/agi/subscription
import math, copy, random
input = (open('enwik7.txt', 'r', encoding='ansi').read())
dogenerate = 0
dodecode = 1 if len(input) == 14 else 0
decode = str(int(open('comp.txt', 'rb').read())) if dodecode == 1 else ''
comp = open('comp.txt', 'w+')
tree = ['', [], []]
low = 1
de = 0
middle = 1
for count2 in range(100000):
  window = input[count2: count2 + 15]
  energy = input[count2 - 280 if count2 > 279 else 0 : count2 + 14]
  predictions = [[""" abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789?!<>=":/.-\n_#[]{}*'|&;,()骢 ³â€”~“¡%لا·ˆŠÐ½°Ñ…¸¼¯Œ²¦Ûר›™–ã‚ƒ­ºì•ë˜í¤à¥±—´¹ÅŸæåœä‰Ä«+žá£è‡é箁Âµ¾¬‘¶‹ŽÉÊ\†$šÏÎ	Ú’Ç@ÒÖÌêË^`ÆÔÕÓ""", [0.00001] * 200]]

  if count2 != 0:
    for z in range(15):
      node = 0
      for zz in range(15):
        if (14 - z) + zz != 14:
          char_index = tree[node].find(window[(14 - z) + zz]) + 1
          if char_index == 0:
            break
          node = (tree[(node + 2)][char_index - 1])
        else:
          predictions.insert(1, tree[node: node + 2])
          break

  predict = ['', []]
  remaining = 1
  for q in range(len(predictions)):
    j = [copy.copy(predictions[q][0]), copy.copy(predictions[q][1])]
    h = [copy.copy(predictions[q][0]), copy.copy(predictions[q][1])]

    if q != 0:
      for f in range(len(oj[0])):
        char_indexx = j[0].find(oj[0][f]) + 1
        if char_indexx != 0:
          h[1][char_indexx - 1] = (h[1][char_indexx - 1] - oj[1][f]) + (0.58 * oj[1][f])
    oj = [copy.copy(predictions[q][0]), copy.copy(predictions[q][1])]

    lj, sum2, sum3 = len(j[1]), sum(j[1]), sum(h[1])
    w = sum3 / (4.8 * lj * [0.7, 0.99, 0.99][lj - 1] if lj < 4 else lj * 7.3)
    w = 0.87 if w > 20 else 0.9 if w > 10 else 0.79 if w > 3 else 0.67 if w > 1 else 0.55 if w > 0.8 else 0.5 if w > 0.6 else 0.45 if w > 0.4 else w
    _25ofRoof = (w * [0.99, 0.99, 0.93, 0.93, 0.93, 0.8, 0.55, 0.5, 0.28, 0.28, 0.3, 0.33, 0.33, 0.5, 0.99, 0.53][len(predictions) - 1 - q]) * remaining
    remaining -= _25ofRoof

    for g in range(lj):
      char_index = predict[0].find(j[0][g]) + 1
      if char_index == 0:
        predict[0] += j[0][g]
        predict[1].append((j[1][g] / sum2) * _25ofRoof)
      else:
        predict[1][char_index - 1] += ((j[1][g] / sum2) * _25ofRoof)
  summ = 1 - sum(predict[1])
  for n in range(len(predict[1])):
    predict[1][n] += summ / len(predict[1])

  decodepart = float('0.' + str(decode[0 + de:16 + de]))
  for m in range(len(predict[0])):
    x = random.choices(predict[0], weights=(predict[1]), k=1) if dogenerate == 1 else predict[0][m]
    low -= predict[1][m] * middle
    if dodecode == 0 and x == window[-1]:
      break
    elif dodecode == 1 and decodepart > low or dogenerate == 1:
      window += x[0]
      input += x[0]
      break
  char_location = 0
  high = low + predict[1][m] * middle
  s1 = str(f'{low:.18f}')
  s2 = str(f'{high:.18f}')
  sl = len(str(low))
  while s1[char_location] == s2[char_location] and char_location != sl:
    char_location += 1
  comp.write(s1[2: char_location - 1])
  cl = 10 ** (char_location - 3)
  de += char_location - 3
  high = high * cl - math.floor(high * cl)
  low = low * cl - math.floor(low * cl)
  middle = high - low
  low = high

  node = 0
  for i in window:
    char_index = tree[node].find(i) + 1
    if char_index == 0:
      tree[node] += i
      tree[node + 1].append(1)
      tree[node + 2].append(len(tree))
      node = len(tree)
      tree.extend(('', [], []))
    else:
      tree[node + 1][char_index - 1] += 1
      node = tree[node + 2][char_index - 1]

comp.seek(0), print(len(comp.read()) / 2.40819) if dodecode == 0 else open('decomp.txt', 'wb').write(bytes(str(input), 'ansi'))

Reply via email to