Lol I tried it again and it seems the weighting for layers screws it over
mostly and does its job!! Code is below, see lines 33 - 40 I remove sames
from the last previous layer of counts, but only half per each 1 count, and
this only affects the weighting for layers - not the actual counts
distribution per layer. I really tried to make this work and it helps only
little. Maybe later it'll help more. But right now only 0.02MB improvement.
If it were 0.1MB then that would be worth it.
------------------------------------------
Artificial General Intelligence List: AGI
Permalink:
https://agi.topicbox.com/groups/agi/T3579504be8ceefe6-Ma48a89aae3a7e4f414453ccd
Delivery options: https://agi.topicbox.com/groups/agi/subscription
import math, copy, random
input = (open('enwik7.txt', 'r', encoding='ansi').read())
dogenerate = 0
dodecode = 1 if len(input) == 14 else 0
decode = str(int(open('comp.txt', 'rb').read())) if dodecode == 1 else ''
comp = open('comp.txt', 'w+')
tree = ['', [], []]
low = 1
de = 0
middle = 1
for count2 in range(100000):
window = input[count2: count2 + 15]
energy = input[count2 - 280 if count2 > 279 else 0 : count2 + 14]
predictions = [[""" abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789?!<>=":/.-\n_#[]{}*'|&;,()骢 ³â€”~“¡%لا·ˆŠÐ½°Ñ…¸¼¯Œ²¦Ûר›™–ゃºì•ë˜í¤à¥±—´¹ÅŸæåœä‰Ä«+žá£è‡éç®Âµ¾¬‘¶‹ŽÉÊ\†$šÏÎ Ú’Ç@ÒÖÌêË^`ÆÔÕÓ""", [0.00001] * 200]]
if count2 != 0:
for z in range(15):
node = 0
for zz in range(15):
if (14 - z) + zz != 14:
char_index = tree[node].find(window[(14 - z) + zz]) + 1
if char_index == 0:
break
node = (tree[(node + 2)][char_index - 1])
else:
predictions.insert(1, tree[node: node + 2])
break
predict = ['', []]
remaining = 1
for q in range(len(predictions)):
j = [copy.copy(predictions[q][0]), copy.copy(predictions[q][1])]
h = [copy.copy(predictions[q][0]), copy.copy(predictions[q][1])]
if q != 0:
for f in range(len(oj[0])):
char_indexx = j[0].find(oj[0][f]) + 1
if char_indexx != 0:
h[1][char_indexx - 1] = (h[1][char_indexx - 1] - oj[1][f]) + (0.58 * oj[1][f])
oj = [copy.copy(predictions[q][0]), copy.copy(predictions[q][1])]
lj, sum2, sum3 = len(j[1]), sum(j[1]), sum(h[1])
w = sum3 / (4.8 * lj * [0.7, 0.99, 0.99][lj - 1] if lj < 4 else lj * 7.3)
w = 0.87 if w > 20 else 0.9 if w > 10 else 0.79 if w > 3 else 0.67 if w > 1 else 0.55 if w > 0.8 else 0.5 if w > 0.6 else 0.45 if w > 0.4 else w
_25ofRoof = (w * [0.99, 0.99, 0.93, 0.93, 0.93, 0.8, 0.55, 0.5, 0.28, 0.28, 0.3, 0.33, 0.33, 0.5, 0.99, 0.53][len(predictions) - 1 - q]) * remaining
remaining -= _25ofRoof
for g in range(lj):
char_index = predict[0].find(j[0][g]) + 1
if char_index == 0:
predict[0] += j[0][g]
predict[1].append((j[1][g] / sum2) * _25ofRoof)
else:
predict[1][char_index - 1] += ((j[1][g] / sum2) * _25ofRoof)
summ = 1 - sum(predict[1])
for n in range(len(predict[1])):
predict[1][n] += summ / len(predict[1])
decodepart = float('0.' + str(decode[0 + de:16 + de]))
for m in range(len(predict[0])):
x = random.choices(predict[0], weights=(predict[1]), k=1) if dogenerate == 1 else predict[0][m]
low -= predict[1][m] * middle
if dodecode == 0 and x == window[-1]:
break
elif dodecode == 1 and decodepart > low or dogenerate == 1:
window += x[0]
input += x[0]
break
char_location = 0
high = low + predict[1][m] * middle
s1 = str(f'{low:.18f}')
s2 = str(f'{high:.18f}')
sl = len(str(low))
while s1[char_location] == s2[char_location] and char_location != sl:
char_location += 1
comp.write(s1[2: char_location - 1])
cl = 10 ** (char_location - 3)
de += char_location - 3
high = high * cl - math.floor(high * cl)
low = low * cl - math.floor(low * cl)
middle = high - low
low = high
node = 0
for i in window:
char_index = tree[node].find(i) + 1
if char_index == 0:
tree[node] += i
tree[node + 1].append(1)
tree[node + 2].append(len(tree))
node = len(tree)
tree.extend(('', [], []))
else:
tree[node + 1][char_index - 1] += 1
node = tree[node + 2][char_index - 1]
comp.seek(0), print(len(comp.read()) / 2.40819) if dodecode == 0 else open('decomp.txt', 'wb').write(bytes(str(input), 'ansi'))