[Scidvspc-users] twic2scid and pgn2scid

Patrick Nikić Wed, 20 May 2020 13:02:23 -0700

Hi everyone,

let me first introduce myself. My name is Patrick. I'm long time user ofScid vs. PC, junior software engineer and amateur chess player (FIDE~2100). I'm not sure whether my message is of interest to any of you, orI am sharing it on the wrong place perhaps.

I've been using the twic2scid.py script written for python 2.x for sometime. Upon a recent fresh OS installation, by default I did not havepython2.x installed, so I rewrote the script for python 3.x. If you areinterested, you can find it in the attachment.

Meanwhile, I also wrote a pgn2scid.py script for merging a folder of pgnfiles (or a single pgn file) into a scid database, which you can alsofind in the attachment.


Best regards,
Patrick

# Append games from a single PGN file or a folder with PGN files to an existing
# Scid database and perform spellchecking

# By Patrick Nikić

# NOTE: It works recursively i.e. also from the folders inside the folder etc.

# NOTE: This program comes with absolutely NO WARRANTY.  If anything
# goes wrong, it may delete your database entirely instead of adding
# to it!  I recommend backing up your database, trying it out, and
# then adding a "last week rollback" type of copy command to your
# cronjob, just to make sure.

import glob
import os
import sys
import tempfile

from argparse import ArgumentParser, RawTextHelpFormatter

usage = "Example usage:\n" \
    "  pgn2scid.py -f ~/recent_pgns_folder ~/scidbases/twic -s ~/scidbases/spelling.ssp\n" \
    "    --merges all .PGN files from given folder into specified scid database and spelling file.\n" \
    "  pgn2scid.py -f ~/torunament.pgn ~/scidbases/twic -s ~/scidbases/spelling.ssp -b\n" \
    "    --merges the given .PGN file into specified scid database and spelling file. " \
    "A backup copy of the database is saved.\n" \

parser = ArgumentParser(description=usage, formatter_class=RawTextHelpFormatter)
parser.add_argument("-f", "--folder", dest="folder", required=True,
                    help="specify the PGN file to merge or folder containing PGN files for merging")

parser.add_argument("-d", "--database", dest="database", required=True,
                    help="specify the scid database to merge into. Note that this omits the extension .si4 of the database.")

parser.add_argument("-s", "--spelling", dest="spelling", default="spelling.ssp", required=True,
                    help="specifies the spelling file for meta corrections. Default value is 'spelling.ssp'.")

parser.add_argument("-b", "--backup", action="store_true", dest="backup",
                    help="stores a backup the specified database")


options = parser.parse_args()
os.environ["PATH"] += ":/usr/local/bin"

scid_database = options.database
scid_spelling = options.spelling
backup_flag = options.backup
folder_pgn = os.path.abspath(options.folder)

pgn_fd, pgn_path = tempfile.mkstemp(".pgn")
if os.path.isdir(folder_pgn):
    print(f"Concatenating all the PGN(s) from {folder_pgn}...")
    with open(pgn_fd, "wb") as all_pgns:
        for dirpath, dirnames, filenames in os.walk(folder_pgn):
            for f in filenames:
                full_path = os.path.join(dirpath, f)
                if full_path.endswith(".pgn"):
                    print(f"Appending {full_path}...")
                    with open(full_path, "rb") as one_pgn:
                        all_pgns.write(one_pgn.read())
elif os.path.isfile(folder_pgn):
    with open(pgn_fd, "wb") as all_pgns:
        with open(folder_pgn, "rb") as one_pgn:
            all_pgns.write(one_pgn.read())
else:
    print("Neither a file nor a folder was given!")
    os.unlink(pgn_path)
    sys.exit(1)

print("Converting PGN file to a temporary scid database...")
os.system(f"pgnscid -f {pgn_path} {scid_database}.new")
os.unlink(pgn_path)

os.system(f"scmerge {scid_database}.old {scid_database}")
list(map(os.unlink, glob.glob(f"{scid_database}.s*")))

print(f"Merging all games with {scid_database}...")
os.system(f"scmerge {scid_database} {scid_database}.new {scid_database}.old")
list(map(os.unlink, glob.glob(f"{scid_database}.new.s*")))

if backup_flag:
    print(f"Keeping backup of specified database as {scid_database}.old...")
else:
    list(map(os.unlink, glob.glob(f"{scid_database}.old.s*")))

print("Spell checking the new database...")
os.system(f"sc_spell {scid_database} {scid_spelling}")

# Download the current week's TWIC games and append them to an existing
# Scid database and perform spellchecking.

# Original script written by John Wiegley

# Modifications by Maksim Grinman:
# 3/20/2013: Updated to work with the new TWIC site at http://www.theweekinchess.com/twic
# 3/23/2013: Updated to support optional flags -a and -n.

# Modifications by Patrick Nikić:
# 5/20/2020: Port script to python3

# NOTE: This program comes with absolutely NO WARRANTY.  If anything
# goes wrong, it may delete your database entirely instead of adding
# to it!  I recommend backing up your database, trying it out, and
# then adding a "last week rollback" type of copy command to your
# cronjob, just to make sure.

import glob
import os
import re
import shutil
import sys
import tempfile
import urllib.request
import zipfile

from argparse import ArgumentParser, RawTextHelpFormatter

usage = "Example usage:\n" \
    "  twic2scid.py -n 3 -d ~/scidbases/twic -s ~/scidbases/spelling.ssp\n" \
    "    --merges latest 3 pgns into specified scid database and spelling file.\n" \
    "  twic2scid.py -a\n" \
    "    --merges all pgns available into the default database with the default spelling file.\n" \
    "  twic2scid.py --latestn=5 --spelling=another_spelling.ssp\n" \
    "    --merges latest 5 pgns into the default database 'twic.si4' in current directory, and uses " \
    "spelling file 'another_spelling.ssp' in current directory."

parser = ArgumentParser(description=usage, formatter_class=RawTextHelpFormatter)
parser.add_argument("-a", "--all", action="store_true", dest="all",
                    help="gets all pgn archives on the page. Overrides -n if specified.")

parser.add_argument("-n", "--latestn", type=int, dest="latestn",
                    help="gets LATESTN archives. LATESTN must be an integer. If LATESTN is greater than the number of pgn archives found on the twic page, this is equivalent to --all. If LATESTN is zero, this option is ignored.")

parser.add_argument("-d", "--database", dest="database", default="twic",
                    help="specify the scid database to merge into. Default value is 'twic'. Note that this omits the extension .si4 of the database.")

parser.add_argument("-s", "--spelling", dest="spelling", default="spelling.ssp",
                    help="specifies the spelling file for meta corrections. Default value is 'spelling.ssp'.")


options = parser.parse_args()
os.environ['PATH'] += ":/usr/local/bin"

if options.all or options.latestn == 0 or options.latestn == None:
    options.latestn = None
else:
    options.latestn = abs(options.latestn)

scid_database = options.database
scid_spelling = options.spelling

print("Downloading the Week in Chess main page...")

# list of pgn links found
pgn_links = []
found = 0

with urllib.request.urlopen("https://www.theweekinchess.com/twic";) as url:
    for line in url.readlines():
        # Match a https:// string until encounter of " symbol
        line = line.decode()
        match = re.search("https://[^\"]+";, line)
        if match:
            # Choose the zip that contains PGN (not CBV, Text or else)
            pgn = re.search(">PGN<", line)
            if (pgn):
                pgn_links.append(match.group(0))
                found += 1

                if options.all:
                    continue
                elif options.latestn and found != options.latestn:
                    continue
                else:
                    break

if not found:
    print("Could not find PGN zipfile in twic.html!")
    sys.exit(1)

# lftp is preferred in the following code, since it does all the retrying and status display

print(f"Getting PGN archives {pgn_links}")

# will hold scid databases to be merged
databases = []
    
# will hold zips downloaded, each zip should contain a pgn file
pgn_zips = []

# will hold temporary zip files to be cleaned up after use
containers = []

for link in pgn_links:
    zip_fd, zip_path =  tempfile.mkstemp(".zip")
    containers.append(zip_path)

    if os.path.isfile("usr/bin/lftp"):
        status = os.system(f"lftp -c 'get {link} -o {zip_path}; quit'")
    else:
        status = os.system(f"wget -O {zip_path} {link}")

    if status != 0:
        print("lftp or wget not working, retrying directly...")
        with open(zip_fd, "wb") as f:
            with urllib.request.urlopen(link) as response:
                shutil.copyfileobj(response, f)

    pgn_zips.append(zipfile.ZipFile(zip_path))

print("Unzipping and converting to scid databases...")
for pgn_zip in pgn_zips:
    for elem in pgn_zip.namelist():
        if re.search("\.pgn$", elem):
            pgn_fd, pgn_path = tempfile.mkstemp(".pgn")
            with open(pgn_fd, "wb") as pgn:
                pgn.write(pgn_zip.read(elem))

            db_fd, db_path = tempfile.mkstemp()
            os.system(f"pgnscid -f {pgn_path} {db_path}")
            databases.append(db_path)

            os.unlink(pgn_path)
    pgn_zip.close()
    
list(map(os.unlink, containers))

print(f"Merging databases into {scid_database}.new...")
if databases:
    status = os.system(f"scmerge {scid_database}.new {scid_database} {' '.join(databases)}")

    for db in databases:
        list(map(os.unlink, glob.glob(f"{db}*")))

    if status == 0:
        print(f"Moving new database to {scid_database}...")
        list(map(os.unlink, glob.glob(f"{scid_database}.s*")))
        os.system(f"scmerge {scid_database} {scid_database}.new")
        list(map(os.unlink, glob.glob(f"{scid_database}.new.s*")))

        print("Spell checking the new database...")
        os.system(f"sc_spell {scid_database} {scid_spelling}")

_______________________________________________
Scidvspc-users mailing list
Scidvspc-users@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/scidvspc-users

[Scidvspc-users] twic2scid and pgn2scid

Reply via email to