javacatknight commented on code in PR #36386: URL: https://github.com/apache/superset/pull/36386#discussion_r2583010151
########## docs/src/resources/experimental/parse-md-to-json.py: ########## @@ -0,0 +1,115 @@ +""" +To regenerate JSON from the INTHEWILD.md page. +Replace paths with local files. + +Logos and contributor(s) are optional. +This treats entries like so: + +### Sharing Economy + +- [Airbnb](https://github.com/airbnb) <!--airbnb.png--> +- [Faasos](https://faasos.com/) [@shashanksingh] <!--fassos.svg--> + +And parses it into: + +{ + "categories": { + "Sharing Economy": [ + { + "name": "Airbnb", + "url": "https://github.com/airbnb", + "logo": "", + "contributors": "" + }, + { + "name": "Faasos", + "url": "https://faasos.com/", + "logo": "faasos.svg", + "contributors": "[@shashanksingh]" + }, ... + ] + } +} + +""" +import json +import re + +categories = {} +FILEPATH_IN = "text.md" #CHANGE TO YOUR PATH +FILEPATH_OUT = "new.json" #CHANGE TO YOUR PATH + +### For Parsing: +ID_CAT = "###" # Category Identifier +ID_ENTRY = "-" # Entry Identifier + +def to_json(line): #str -> dict + PATTERN = r'(?:<!--\s*(.*?)\s*-->)|(\[.*?\])|(\(.*?\))' + line = line[2:] + + # Split the string using re.split + # Filter out empty strings and whitespace + result = [p for p in re.split(PATTERN, line) if p and not p.isspace() and p != ''] + print(result) + name = result[0][1:-1] + url = result[1][1:-1] Review Comment: Guaranteed to produce at least two parts - name of organization, website url. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
