|
""" |
|
pygments.lexers._postgres_builtins |
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|
|
|
Self-updating data files for PostgreSQL lexer. |
|
|
|
Run with `python -I` to update itself. |
|
|
|
:copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS. |
|
:license: BSD, see LICENSE for details. |
|
""" |
|
|
|
|
|
|
|
KEYWORDS = ( |
|
'ABORT', |
|
'ABSOLUTE', |
|
'ACCESS', |
|
'ACTION', |
|
'ADD', |
|
'ADMIN', |
|
'AFTER', |
|
'AGGREGATE', |
|
'ALL', |
|
'ALSO', |
|
'ALTER', |
|
'ALWAYS', |
|
'ANALYSE', |
|
'ANALYZE', |
|
'AND', |
|
'ANY', |
|
'ARRAY', |
|
'AS', |
|
'ASC', |
|
'ASENSITIVE', |
|
'ASSERTION', |
|
'ASSIGNMENT', |
|
'ASYMMETRIC', |
|
'AT', |
|
'ATOMIC', |
|
'ATTACH', |
|
'ATTRIBUTE', |
|
'AUTHORIZATION', |
|
'BACKWARD', |
|
'BEFORE', |
|
'BEGIN', |
|
'BETWEEN', |
|
'BIGINT', |
|
'BINARY', |
|
'BIT', |
|
'BOOLEAN', |
|
'BOTH', |
|
'BREADTH', |
|
'BY', |
|
'CACHE', |
|
'CALL', |
|
'CALLED', |
|
'CASCADE', |
|
'CASCADED', |
|
'CASE', |
|
'CAST', |
|
'CATALOG', |
|
'CHAIN', |
|
'CHAR', |
|
'CHARACTER', |
|
'CHARACTERISTICS', |
|
'CHECK', |
|
'CHECKPOINT', |
|
'CLASS', |
|
'CLOSE', |
|
'CLUSTER', |
|
'COALESCE', |
|
'COLLATE', |
|
'COLLATION', |
|
'COLUMN', |
|
'COLUMNS', |
|
'COMMENT', |
|
'COMMENTS', |
|
'COMMIT', |
|
'COMMITTED', |
|
'COMPRESSION', |
|
'CONCURRENTLY', |
|
'CONFIGURATION', |
|
'CONFLICT', |
|
'CONNECTION', |
|
'CONSTRAINT', |
|
'CONSTRAINTS', |
|
'CONTENT', |
|
'CONTINUE', |
|
'CONVERSION', |
|
'COPY', |
|
'COST', |
|
'CREATE', |
|
'CROSS', |
|
'CSV', |
|
'CUBE', |
|
'CURRENT', |
|
'CURRENT_CATALOG', |
|
'CURRENT_DATE', |
|
'CURRENT_ROLE', |
|
'CURRENT_SCHEMA', |
|
'CURRENT_TIME', |
|
'CURRENT_TIMESTAMP', |
|
'CURRENT_USER', |
|
'CURSOR', |
|
'CYCLE', |
|
'DATA', |
|
'DATABASE', |
|
'DAY', |
|
'DEALLOCATE', |
|
'DEC', |
|
'DECIMAL', |
|
'DECLARE', |
|
'DEFAULT', |
|
'DEFAULTS', |
|
'DEFERRABLE', |
|
'DEFERRED', |
|
'DEFINER', |
|
'DELETE', |
|
'DELIMITER', |
|
'DELIMITERS', |
|
'DEPENDS', |
|
'DEPTH', |
|
'DESC', |
|
'DETACH', |
|
'DICTIONARY', |
|
'DISABLE', |
|
'DISCARD', |
|
'DISTINCT', |
|
'DO', |
|
'DOCUMENT', |
|
'DOMAIN', |
|
'DOUBLE', |
|
'DROP', |
|
'EACH', |
|
'ELSE', |
|
'ENABLE', |
|
'ENCODING', |
|
'ENCRYPTED', |
|
'END', |
|
'ENUM', |
|
'ESCAPE', |
|
'EVENT', |
|
'EXCEPT', |
|
'EXCLUDE', |
|
'EXCLUDING', |
|
'EXCLUSIVE', |
|
'EXECUTE', |
|
'EXISTS', |
|
'EXPLAIN', |
|
'EXPRESSION', |
|
'EXTENSION', |
|
'EXTERNAL', |
|
'EXTRACT', |
|
'FALSE', |
|
'FAMILY', |
|
'FETCH', |
|
'FILTER', |
|
'FINALIZE', |
|
'FIRST', |
|
'FLOAT', |
|
'FOLLOWING', |
|
'FOR', |
|
'FORCE', |
|
'FOREIGN', |
|
'FORWARD', |
|
'FREEZE', |
|
'FROM', |
|
'FULL', |
|
'FUNCTION', |
|
'FUNCTIONS', |
|
'GENERATED', |
|
'GLOBAL', |
|
'GRANT', |
|
'GRANTED', |
|
'GREATEST', |
|
'GROUP', |
|
'GROUPING', |
|
'GROUPS', |
|
'HANDLER', |
|
'HAVING', |
|
'HEADER', |
|
'HOLD', |
|
'HOUR', |
|
'IDENTITY', |
|
'IF', |
|
'ILIKE', |
|
'IMMEDIATE', |
|
'IMMUTABLE', |
|
'IMPLICIT', |
|
'IMPORT', |
|
'IN', |
|
'INCLUDE', |
|
'INCLUDING', |
|
'INCREMENT', |
|
'INDEX', |
|
'INDEXES', |
|
'INHERIT', |
|
'INHERITS', |
|
'INITIALLY', |
|
'INLINE', |
|
'INNER', |
|
'INOUT', |
|
'INPUT', |
|
'INSENSITIVE', |
|
'INSERT', |
|
'INSTEAD', |
|
'INT', |
|
'INTEGER', |
|
'INTERSECT', |
|
'INTERVAL', |
|
'INTO', |
|
'INVOKER', |
|
'IS', |
|
'ISNULL', |
|
'ISOLATION', |
|
'JOIN', |
|
'KEY', |
|
'LABEL', |
|
'LANGUAGE', |
|
'LARGE', |
|
'LAST', |
|
'LATERAL', |
|
'LEADING', |
|
'LEAKPROOF', |
|
'LEAST', |
|
'LEFT', |
|
'LEVEL', |
|
'LIKE', |
|
'LIMIT', |
|
'LISTEN', |
|
'LOAD', |
|
'LOCAL', |
|
'LOCALTIME', |
|
'LOCALTIMESTAMP', |
|
'LOCATION', |
|
'LOCK', |
|
'LOCKED', |
|
'LOGGED', |
|
'MAPPING', |
|
'MATCH', |
|
'MATERIALIZED', |
|
'MAXVALUE', |
|
'METHOD', |
|
'MINUTE', |
|
'MINVALUE', |
|
'MODE', |
|
'MONTH', |
|
'MOVE', |
|
'NAME', |
|
'NAMES', |
|
'NATIONAL', |
|
'NATURAL', |
|
'NCHAR', |
|
'NEW', |
|
'NEXT', |
|
'NFC', |
|
'NFD', |
|
'NFKC', |
|
'NFKD', |
|
'NO', |
|
'NONE', |
|
'NORMALIZE', |
|
'NORMALIZED', |
|
'NOT', |
|
'NOTHING', |
|
'NOTIFY', |
|
'NOTNULL', |
|
'NOWAIT', |
|
'NULL', |
|
'NULLIF', |
|
'NULLS', |
|
'NUMERIC', |
|
'OBJECT', |
|
'OF', |
|
'OFF', |
|
'OFFSET', |
|
'OIDS', |
|
'OLD', |
|
'ON', |
|
'ONLY', |
|
'OPERATOR', |
|
'OPTION', |
|
'OPTIONS', |
|
'OR', |
|
'ORDER', |
|
'ORDINALITY', |
|
'OTHERS', |
|
'OUT', |
|
'OUTER', |
|
'OVER', |
|
'OVERLAPS', |
|
'OVERLAY', |
|
'OVERRIDING', |
|
'OWNED', |
|
'OWNER', |
|
'PARALLEL', |
|
'PARSER', |
|
'PARTIAL', |
|
'PARTITION', |
|
'PASSING', |
|
'PASSWORD', |
|
'PLACING', |
|
'PLANS', |
|
'POLICY', |
|
'POSITION', |
|
'PRECEDING', |
|
'PRECISION', |
|
'PREPARE', |
|
'PREPARED', |
|
'PRESERVE', |
|
'PRIMARY', |
|
'PRIOR', |
|
'PRIVILEGES', |
|
'PROCEDURAL', |
|
'PROCEDURE', |
|
'PROCEDURES', |
|
'PROGRAM', |
|
'PUBLICATION', |
|
'QUOTE', |
|
'RANGE', |
|
'READ', |
|
'REAL', |
|
'REASSIGN', |
|
'RECHECK', |
|
'RECURSIVE', |
|
'REF', |
|
'REFERENCES', |
|
'REFERENCING', |
|
'REFRESH', |
|
'REINDEX', |
|
'RELATIVE', |
|
'RELEASE', |
|
'RENAME', |
|
'REPEATABLE', |
|
'REPLACE', |
|
'REPLICA', |
|
'RESET', |
|
'RESTART', |
|
'RESTRICT', |
|
'RETURN', |
|
'RETURNING', |
|
'RETURNS', |
|
'REVOKE', |
|
'RIGHT', |
|
'ROLE', |
|
'ROLLBACK', |
|
'ROLLUP', |
|
'ROUTINE', |
|
'ROUTINES', |
|
'ROW', |
|
'ROWS', |
|
'RULE', |
|
'SAVEPOINT', |
|
'SCHEMA', |
|
'SCHEMAS', |
|
'SCROLL', |
|
'SEARCH', |
|
'SECOND', |
|
'SECURITY', |
|
'SELECT', |
|
'SEQUENCE', |
|
'SEQUENCES', |
|
'SERIALIZABLE', |
|
'SERVER', |
|
'SESSION', |
|
'SESSION_USER', |
|
'SET', |
|
'SETOF', |
|
'SETS', |
|
'SHARE', |
|
'SHOW', |
|
'SIMILAR', |
|
'SIMPLE', |
|
'SKIP', |
|
'SMALLINT', |
|
'SNAPSHOT', |
|
'SOME', |
|
'SQL', |
|
'STABLE', |
|
'STANDALONE', |
|
'START', |
|
'STATEMENT', |
|
'STATISTICS', |
|
'STDIN', |
|
'STDOUT', |
|
'STORAGE', |
|
'STORED', |
|
'STRICT', |
|
'STRIP', |
|
'SUBSCRIPTION', |
|
'SUBSTRING', |
|
'SUPPORT', |
|
'SYMMETRIC', |
|
'SYSID', |
|
'SYSTEM', |
|
'TABLE', |
|
'TABLES', |
|
'TABLESAMPLE', |
|
'TABLESPACE', |
|
'TEMP', |
|
'TEMPLATE', |
|
'TEMPORARY', |
|
'TEXT', |
|
'THEN', |
|
'TIES', |
|
'TIME', |
|
'TIMESTAMP', |
|
'TO', |
|
'TRAILING', |
|
'TRANSACTION', |
|
'TRANSFORM', |
|
'TREAT', |
|
'TRIGGER', |
|
'TRIM', |
|
'TRUE', |
|
'TRUNCATE', |
|
'TRUSTED', |
|
'TYPE', |
|
'TYPES', |
|
'UESCAPE', |
|
'UNBOUNDED', |
|
'UNCOMMITTED', |
|
'UNENCRYPTED', |
|
'UNION', |
|
'UNIQUE', |
|
'UNKNOWN', |
|
'UNLISTEN', |
|
'UNLOGGED', |
|
'UNTIL', |
|
'UPDATE', |
|
'USER', |
|
'USING', |
|
'VACUUM', |
|
'VALID', |
|
'VALIDATE', |
|
'VALIDATOR', |
|
'VALUE', |
|
'VALUES', |
|
'VARCHAR', |
|
'VARIADIC', |
|
'VARYING', |
|
'VERBOSE', |
|
'VERSION', |
|
'VIEW', |
|
'VIEWS', |
|
'VOLATILE', |
|
'WHEN', |
|
'WHERE', |
|
'WHITESPACE', |
|
'WINDOW', |
|
'WITH', |
|
'WITHIN', |
|
'WITHOUT', |
|
'WORK', |
|
'WRAPPER', |
|
'WRITE', |
|
'XML', |
|
'XMLATTRIBUTES', |
|
'XMLCONCAT', |
|
'XMLELEMENT', |
|
'XMLEXISTS', |
|
'XMLFOREST', |
|
'XMLNAMESPACES', |
|
'XMLPARSE', |
|
'XMLPI', |
|
'XMLROOT', |
|
'XMLSERIALIZE', |
|
'XMLTABLE', |
|
'YEAR', |
|
'YES', |
|
'ZONE', |
|
) |
|
|
|
DATATYPES = ( |
|
'bigint', |
|
'bigserial', |
|
'bit', |
|
'bit varying', |
|
'bool', |
|
'boolean', |
|
'box', |
|
'bytea', |
|
'char', |
|
'character', |
|
'character varying', |
|
'cidr', |
|
'circle', |
|
'date', |
|
'decimal', |
|
'double precision', |
|
'float4', |
|
'float8', |
|
'inet', |
|
'int', |
|
'int2', |
|
'int4', |
|
'int8', |
|
'integer', |
|
'interval', |
|
'json', |
|
'jsonb', |
|
'line', |
|
'lseg', |
|
'macaddr', |
|
'macaddr8', |
|
'money', |
|
'numeric', |
|
'path', |
|
'pg_lsn', |
|
'pg_snapshot', |
|
'point', |
|
'polygon', |
|
'real', |
|
'serial', |
|
'serial2', |
|
'serial4', |
|
'serial8', |
|
'smallint', |
|
'smallserial', |
|
'text', |
|
'time', |
|
'timestamp', |
|
'timestamptz', |
|
'timetz', |
|
'tsquery', |
|
'tsvector', |
|
'txid_snapshot', |
|
'uuid', |
|
'varbit', |
|
'varchar', |
|
'with time zone', |
|
'without time zone', |
|
'xml', |
|
) |
|
|
|
PSEUDO_TYPES = ( |
|
'any', |
|
'anyarray', |
|
'anycompatible', |
|
'anycompatiblearray', |
|
'anycompatiblemultirange', |
|
'anycompatiblenonarray', |
|
'anycompatiblerange', |
|
'anyelement', |
|
'anyenum', |
|
'anymultirange', |
|
'anynonarray', |
|
'anyrange', |
|
'cstring', |
|
'event_trigger', |
|
'fdw_handler', |
|
'index_am_handler', |
|
'internal', |
|
'language_handler', |
|
'pg_ddl_command', |
|
'record', |
|
'table_am_handler', |
|
'trigger', |
|
'tsm_handler', |
|
'unknown', |
|
'void', |
|
) |
|
|
|
|
|
PSEUDO_TYPES = tuple(sorted(set(PSEUDO_TYPES) - set(map(str.lower, KEYWORDS)))) |
|
|
|
PLPGSQL_KEYWORDS = ( |
|
'ALIAS', 'CONSTANT', 'DIAGNOSTICS', 'ELSIF', 'EXCEPTION', 'EXIT', |
|
'FOREACH', 'GET', 'LOOP', 'NOTICE', 'OPEN', 'PERFORM', 'QUERY', 'RAISE', |
|
'RETURN', 'REVERSE', 'SQLSTATE', 'WHILE', |
|
) |
|
|
|
|
|
|
|
|
|
EXPLAIN_KEYWORDS = ( |
|
'Aggregate', |
|
'Append', |
|
'Bitmap Heap Scan', |
|
'Bitmap Index Scan', |
|
'BitmapAnd', |
|
'BitmapOr', |
|
'CTE Scan', |
|
'Custom Scan', |
|
'Delete', |
|
'Foreign Scan', |
|
'Function Scan', |
|
'Gather Merge', |
|
'Gather', |
|
'Group', |
|
'GroupAggregate', |
|
'Hash Join', |
|
'Hash', |
|
'HashAggregate', |
|
'Incremental Sort', |
|
'Index Only Scan', |
|
'Index Scan', |
|
'Insert', |
|
'Limit', |
|
'LockRows', |
|
'Materialize', |
|
'Memoize', |
|
'Merge Append', |
|
'Merge Join', |
|
'Merge', |
|
'MixedAggregate', |
|
'Named Tuplestore Scan', |
|
'Nested Loop', |
|
'ProjectSet', |
|
'Recursive Union', |
|
'Result', |
|
'Sample Scan', |
|
'Seq Scan', |
|
'SetOp', |
|
'Sort', |
|
'SubPlan', |
|
'Subquery Scan', |
|
'Table Function Scan', |
|
'Tid Range Scan', |
|
'Tid Scan', |
|
'Unique', |
|
'Update', |
|
'Values Scan', |
|
'WindowAgg', |
|
'WorkTable Scan', |
|
) |
|
|
|
|
|
if __name__ == '__main__': |
|
import re |
|
from urllib.request import urlopen |
|
|
|
from pygments.util import format_lines |
|
|
|
|
|
SOURCE_URL = 'https://github.com/postgres/postgres/raw/master' |
|
KEYWORDS_URL = SOURCE_URL + '/src/include/parser/kwlist.h' |
|
DATATYPES_URL = SOURCE_URL + '/doc/src/sgml/datatype.sgml' |
|
|
|
def update_myself(): |
|
content = urlopen(DATATYPES_URL).read().decode('utf-8', errors='ignore') |
|
data_file = list(content.splitlines()) |
|
datatypes = parse_datatypes(data_file) |
|
pseudos = parse_pseudos(data_file) |
|
|
|
content = urlopen(KEYWORDS_URL).read().decode('utf-8', errors='ignore') |
|
keywords = parse_keywords(content) |
|
|
|
update_consts(__file__, 'DATATYPES', datatypes) |
|
update_consts(__file__, 'PSEUDO_TYPES', pseudos) |
|
update_consts(__file__, 'KEYWORDS', keywords) |
|
|
|
def parse_keywords(f): |
|
kw = [] |
|
for m in re.finditer(r'PG_KEYWORD\("(.+?)"', f): |
|
kw.append(m.group(1).upper()) |
|
|
|
if not kw: |
|
raise ValueError('no keyword found') |
|
|
|
kw.sort() |
|
return kw |
|
|
|
def parse_datatypes(f): |
|
dt = set() |
|
for line in f: |
|
if '<sect1' in line: |
|
break |
|
if '<entry><type>' not in line: |
|
continue |
|
|
|
|
|
|
|
|
|
|
|
|
|
line = re.sub("<replaceable>[^<]+</replaceable>", "", line) |
|
line = re.sub("<[^>]+>", "", line) |
|
|
|
|
|
for tmp in [t for tmp in line.split('[') |
|
for t in tmp.split(']') if "(" not in t]: |
|
for t in tmp.split(','): |
|
t = t.strip() |
|
if not t: |
|
continue |
|
dt.add(" ".join(t.split())) |
|
|
|
dt = list(dt) |
|
dt.sort() |
|
return dt |
|
|
|
def parse_pseudos(f): |
|
dt = [] |
|
re_start = re.compile(r'\s*<table id="datatype-pseudotypes-table">') |
|
re_entry = re.compile(r'\s*<entry><type>(.+?)</type></entry>') |
|
re_end = re.compile(r'\s*</table>') |
|
|
|
f = iter(f) |
|
for line in f: |
|
if re_start.match(line) is not None: |
|
break |
|
else: |
|
raise ValueError('pseudo datatypes table not found') |
|
|
|
for line in f: |
|
m = re_entry.match(line) |
|
if m is not None: |
|
dt.append(m.group(1)) |
|
|
|
if re_end.match(line) is not None: |
|
break |
|
else: |
|
raise ValueError('end of pseudo datatypes table not found') |
|
|
|
if not dt: |
|
raise ValueError('pseudo datatypes not found') |
|
|
|
dt.sort() |
|
return dt |
|
|
|
def update_consts(filename, constname, content): |
|
with open(filename, encoding='utf-8') as f: |
|
data = f.read() |
|
|
|
|
|
re_match = re.compile(rf'^{constname}\s*=\s*\($.*?^\s*\)$', re.M | re.S) |
|
m = re_match.search(data) |
|
if not m: |
|
raise ValueError(f'Could not find existing definition for {constname}') |
|
|
|
new_block = format_lines(constname, content) |
|
data = data[:m.start()] + new_block + data[m.end():] |
|
|
|
with open(filename, 'w', encoding='utf-8', newline='\n') as f: |
|
f.write(data) |
|
|
|
update_myself() |
|
|