import re

regex = re.compile('\|-\n\| (\w+)\n\|.+\n\| U\+\w+ \((\d+)\)\n\| (.+)\n')

with open('wikipedia_table.txt') as wiki_table:
    table_text = wiki_table.read()

for ent_name, dec_code, std in regex.findall(table_text):
    uni = list(unichr(int(dec_code)).encode('utf-8'))

    print '"%s", %d,' % (ent_name, len(uni)),
    print "{", ", ".join("0x%02X" % ord(c) for c in uni), "}"