
I've updated txt2rgd.py to Python 3, changed the formatting style to
f-strings, and added a comment to the generated file:

      <!-- Generated by txt2rgd.py: YYYY-MM-DD hh:mm -->
      <studio thrufilter="0" recordfilter="0">

The update to Yamaha-PSR220-PSR230-Panel.rgd includes the comment line
and, as per instructions on the wiki, added details to the <device>
and <librarian> tags.

I've attached the latest here, for your perusal / testing / inclusion
/ rejection / whatever. ;-)

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# txt2rgd:
# DESCR:    Converts from a text based voice list of an instrument/sound
#           card to the XML mased *.rgd format understood by Rosegarden
# USAGE:    txt2rgd <input file> <output file>
#           where the input file contains the voice list in text format
#           and the output file is the name/location of the *.rgd file
#           to be generated
#           The format of the imput file is any reasonable space or comma
#           separated (CSV) list of parameters:
#           a. comments start with '#' and continue to the end of the line
#              and may occur anywhere
#           b. blank lines (or containing whitespace and/or comments only)
#              are ignored
#           c. any line containing data must have 5 fields (+ comments,
#              ignored):
#                  <user marker> <MSB> <LSB> <Prog#> <Voice>  # comment
#              where:
#                  <user marker> is a field used to help data entry,
#                                could be anything, e.g. voice number
#                                but must not contain whitespace, ','
#                                or '#'. This field is ignored in the output
#                  <MSB>         MSB (Most Significant Byte) of the bank
#                                number [range: 0 - 127]
#                  <LSB>         LSB (Least Significant Byte) of the bank
#                                number [range: 0 - 127]
#                  <Prog#>       Program number of the voice witin the
#                                current bank [range: 1 - 128]
#                  <Voice>       The name of the voice as desired
#                                it is best to type it in as specified by
#                                the manufacturer or the GM/XG/GS standard
#                                if available
#              MSB and LSB are 0 for the GM only instruments.
#              Given the notes 2 & 3 some manual intervention on the output
#              may be required to achieve best results.
# NOTES:    1. This script may use features available only in Python 3.0
#              and above
#           2. The device 'id' and 'name' tags are hardwired
#              (apparently the id must be 0)
#           3. If possible we try to guess a bank name according to XG
#              otherwise the bank name is autogenerated as "<MSB>-<LSB>"
# BUGS:     1. The name of the voice can't contain the ',' and '#'
# WARNING:  1. The output file will be overwritten if it already exists
# VER:      0.4 (Released: 2022.01.02)
# AUTH:     Ryurick M. Hristev <ryurick.hris...@canterbury.ac.nz>
#           Kevin Cole <ubuntour...@hacdc.org>
# LICENSE:  This program is too trivial to license but if you need one
#           pick any from: Public Domain, BSD, LGPL or GPL
#           at your convenience.

import sys
import os
import re
import gzip
from datetime import datetime

stamp = datetime.isoformat(datetime.today(),sep=" ", timespec="minutes")

if len(sys.argv) != 3:
    print(f"\n  Usage: {sys.argv[0]} <in_text_filename> <out_rdb_filename>")
    print(f"\n  See the comments inside the script for detailed usage.\n")

in_file = sys.argv[1]
out_file = sys.argv[2]

# markers
line_no = 0
parse_err_no = 0

# will store the structure in a dict of dicts:
# {MSB: {LSB: {Prog#: Voice}}}
data = {}

# each input data line have: <user_field> MSB LSB Prog <Voice_name>
# (after whitespace stripping)
pattern = re.compile(

# convert to XML friendly output
converter_table = {
    "&": "&amp;",
    "<": "&lt;",
    ">": "&gt;",
    '"': "&quot;",
    "'": "&apos;"

# generic bank names { MSB : { LSB : Name } }
# Sources: XG Specifications 1.26, XG Guidebook
# The '(?)' means that the docs do not specify an exact naming convention
# so it's my interpretation.
# Naming is very strongly biased towards XG
# From XG Extra Vol.1 No 6
#  - The voices in banks 1 - 8 are essentially similar to their GM equivalent
#    in bank 0, with only minor (though often highly effective) modifications.
#  - The voices in banks 9 - 15 differ from their GM counterparts mainly by
#    having different AEG (Amplifier Envelope Generator) settings.
#  - The voices in banks 16 - 23 differ from their GM counterparts mainly by
#     having different filter settings.
#  - The voices in banks 24 - 31 differ from their GM counterparts mainly by
#    having different FEG (Filter Envelope Generator) settings.
#  - The voices in banks 32 - 39 differ from their GM counterparts mainly by
#    layering two elements and then detuning the elements or changing their
#    pitch relative to one another in various ways.
#  - The voices in banks 40 - 47 differ from their GM counterparts mainly by
#    layering two elements, with the new wave different from the basic one.
#  - Banks 48 - 63 are reserved for future use.
#  - The voices in banks 64 - 72 differ from their GM counterparts in that
#    they use a different wavetable (though they are designed to create a
#    similar kind of sound).
#  - The voices in banks 96 - 101 often have little or no resemblance to
#    their GM counterpart.
#  - Banks 112 - 127 may be used by future XG instruments for the
#    storage of user-created voices.

bank_names = {
    0: {
        0:  "GM",                    # XG Spec
        1:  "KSP",                   # XG Spec
        2:  "KSP 2",                 # XG Guide (?)
        3:  "Stereo",                # XG Spec
        4:  "Stereo 2",              # XG Guide (?)
        5:  "Stereo 3",              # XG Guide (?)
        6:  "Single Element",        # XG Spec/XG Guide
        8:  "Slow Attack",           # XG Spec/XG Guide
        9:  "Fast Attack",           # XG Guide
        10: "Long Release",          # XG Guide
        11: "Short Release",         # XG Guide
        12: "Fast Decay",            # XG Spec
        13: "Slow Decay",            # XG Guide
        14: "Double Attack",         # XG Spec
        16: "Bright",                # XG Spec
        17: "Bright 2",              # XG Spec (?)
        18: "Dark",                  # XG Spec
        19: "Dark 2",                # XG Spec (?)
        20: "Resonant",              # XG Spec
        24: "Attack Transient",      # XG Spec/XG Guide
        25: "Release Transient",     # XG Spec
        26: "Sweep",                 # XG Guide
        27: "Rezo Sweep",            # XG Spec
        28: "Muted",                 # XG Spec
        32: "Detune 1",              # XG Spec
        33: "Detune 2",              # XG Spec
        34: "Detune 3",              # XG Spec
        35: "Octave Layered 1",      # XG Spec/XG Guide
        36: "Octave Layered 2",      # XG Spec/XG Guide
        37: "Fifth Layered 1",       # XG Spec/XG Guide
        38: "Fifth Layered 2",       # XG Spec/XG Guide
        39: "Bend Up/Down",          # XG Spec/XG Guide
        40: "Tutti",                 # XG Spec
        41: "Tutti 2",               # XG Spec (?)
        42: "Tutti 3",               # XG Spec (?)
        43: "Velocity Switch",       # XG Spec
        43: "Velocity Switch 2",     # XG Guide (?)
        45: "Velocity Crossfade",    # XG Guide
        46: "Velocity Crossfade 2",  # XG Guide (?)
        64: "Other Wave",
    },                               # XG Spec
    64:  {0: "SFX"},                 # XG Spec
    127: {0: "XG Rhythm Kits"},      # XG Guide (?)

# Python Cookbook pg. 88
def multiple_replace(dict, text):
    regex = re.compile("|".join(map(re.escape, list(dict.keys()))))
    return regex.sub(lambda match: dict[match.group(0)], text)

IN_FH = open(in_file, "r")
for line in IN_FH.readlines():
    line_no += 1
    line = line.split("#")[0]
    line = line.strip()
    if len(line) == 0:

    matched_line = pattern.match(line)
    # if there is no match then no group is generated (all or none)
        user  = matched_line.group("user")
        MSB   = int(matched_line.group("MSB"))
        LSB   = int(matched_line.group("LSB"))
        Prog  = int(matched_line.group("Prog"))
        Voice = matched_line.group("Voice")
    except AttributeError:
        print(f"\nInput error at line {line_no}, read: {line}")
        print(f"Skipping line ... continuing parsing...")
        parse_err_no += 1

    # make it XML friendly
    Voice = multiple_replace(converter_table, Voice)

    # Range check on MSB, LSB and Prog#
    if MSB < 0 or MSB > 127:
        print(f"\nInput error at line {line_no}, read: {line}")
        print("Second parameter (MSB) is out of range, "
              "must be between 0 and 127.")
        print("Skipping line ... continuing parsing...")
        parse_err_no += 1
    if LSB < 0 or LSB > 127:
        print(f"\nInput error at line {line_no}, read: {line}")
        print("Third parameter (LSB) is out of range, "
              "must be between 0 and 127.")
        print("Skipping line ... continuing parsing...")
        parse_err_no += 1
    if Prog < 1 or Prog > 128:
        print(f"\nInput error at line {line_no}, read: {line}")
        print("Fourth parameter is out of range, "
              "must be between 1 and 128.")
        print("Skipping line ... continuing parsing...")
        parse_err_no += 1

    # Some confusion here: the program change number starts at 1
    # while the program id starts at 0; substract 1
    Prog -= 1

    # create embedded dicts as required
        data[MSB][LSB][Prog] = Voice
    except KeyError:
            data[MSB][LSB] = {Prog: Voice}
        except KeyError:
            data[MSB] = {LSB: {Prog: Voice}}


if parse_err_no != 0:
    print(f"\nThere were {parse_err_no} error(s) in the input file.")
    print("Please correct the error(s) before proceeding forward.")
    print("No output was generated.\n")

with gzip.open(out_file, "wb") as OUT_FH:

    OUT_FH.write("""<?xml version="1.0" encoding="UTF-8"?>\n""".encode())
    OUT_FH.write("""<!DOCTYPE rosegarden-data>\n""".encode())
    OUT_FH.write(f"""<!-- Generated by txt2rgd.py: {stamp} -->\n""".encode())
    OUT_FH.write("""<studio thrufilter="0" recordfilter="0">\n\n""".encode())

    # WARNING: id here must be 0!
    OUT_FH.write("""<device id="0" name="Unnamed" type="midi">\n\n""".encode())

    # placeholder
    OUT_FH.write("""  <librarian name="Unknown" email="unknown"/>\n\n""".encode())

    # Add instruments, required ('midi' type instruments start at 2000)
    # *** These should no longer be necessary
    # for i in range(16) :
    #    OUT_FH.write(f"""  <instrument id="{2000 + i}" """
    #                 f"""channel="{i}" type="midi">\n""".encode())
    #    OUT_FH.write(f"""    <pan value="64"/>\n""".encode())
    #    OUT_FH.write(f"""    <volume value="100"/>\n""".encode())
    #    OUT_FH.write(f"""    <reverb value="0"/>\n""".encode())
    #    OUT_FH.write(f"""    <chorus value="0"/>\n""".encode())
    #    OUT_FH.write(f"""    <filter value="127"/>\n""".encode())
    #    OUT_FH.write(f"""    <resonance value="0"/>\n""".encode())
    #    OUT_FH.write(f"""    <attack value="0"/>\n""".encode())
    #    OUT_FH.write(f"""    <release value="0"/>\n""".encode())
    #    OUT_FH.write(f"""  </instrument>\n\n""".encode())

    for MSB in list(data.keys()):
        for LSB in list(data[MSB].keys()):
            # dig out a name if possible, else default to bank name as MSB-LSB
            if MSB in bank_names and LSB in bank_names[MSB]:
                name = bank_names[MSB][LSB]
                name = f"{MSB:03u}-{LSB:03u}"
            OUT_FH.write(f"""  <bank name="{name}" msb="{MSB}" lsb="{LSB}">\n"""
            progs = list(data[MSB][LSB].keys())
            for prog in progs:
                OUT_FH.write(f"""    <program id="{prog}" """
            OUT_FH.write("  </bank>\n\n".encode())

