Warning: file_get_contents(/data/phpspider/zhask/data//catemap/5/sql/87.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Sql 使用Antlr4的奇怪输入不匹配_Sql_Parsing_Antlr - Fatal编程技术网

Sql 使用Antlr4的奇怪输入不匹配

Sql 使用Antlr4的奇怪输入不匹配,sql,parsing,antlr,Sql,Parsing,Antlr,我目前正在尝试实现一个能够解析DB2CREATETABLE语句的DB2解析器。因此,我使用了正式的IBM语法 这是我到目前为止的语法: grammar db2; list: sql_expression; sql_expression: (create_statement SEMICOLON)+; /* ---------------------------------------------------------------------------------------

我目前正在尝试实现一个能够解析DB2CREATETABLE语句的DB2解析器。因此,我使用了正式的IBM语法

这是我到目前为止的语法:

grammar db2; 

list: sql_expression;

sql_expression: 
    (create_statement SEMICOLON)+;


/* ------------------------------------------------------------------------------------------------
                                                TABLES MAIN
-------------------------------------------------------------------------------------------------*/

create_statement: 
'CREATE' 'TABLE' tableName=table_name tableMainBlock=table_main_block (initialLoggin=initial_loggin)? (volatilityLabel=volatility)? (rcdfmtLabel=rcdfmt)? (clauseLabel=clause)? (inTS=in_ts)?  #createTable
;

in_ts:
'IN' '"' tsName=NAME '"'
;

table_main_block:
    LEFT_PAREN tableParens=table_parens  RIGHT_PAREN    #tableMainParens
    | tableLike=table_like  #createTableLike
    | asSubqueryClause=as_subquery_clause   #tableAsSubqueryClause  
;

table_parens: 
    ((column_defs=column_definitions)| (tableLike=table_like) | (tableUnique=table_unique) | (tableRef=table_ref) | (tableCheck=table_check))+  #tableParens
;

as_subquery_clause:
    'TABLESUBQUERYCLAUSE'
;
table_name : ('"' dbName=NAME '"' '.')? '"'tableName=NAME'"' #tableNameBunnys
| ('"'dbName=NAME'"''.') ?tableName=NAME    #tableName
;

like_table_name : '"'tableName=NAME'"' #likeTableNameBunnys
| tableName=NAME    #likeTableName
;

table_like:
'LIKE' likeTableName=like_table_name    #tableLikeTable
| 'LIKE' likeViewName=like_view_name copyOptions=copy_options #tableLikeView
;

like_view_name:
    '"'viewName=NAME'"' #viewNameBunnys
| viewName=NAME #viewName
;

initial_loggin:
'NOT LOGGED INITIALLY'  #initialLogging
;

volatility:
'NOT VOLATILE'  #nVolatile
|'NOT VOLATILE' cardinality_volatility  #NVolatileCard
|'VOLATILE' #Volatile
|'VOLATILE' cardinality_volatility  #VolatileCard
;

cardinality_volatility:
'CARDVOL'
;

rcdfmt:
'RCDFMT' rcdfmt_format_name #rcdfmtFormatName
;

rcdfmt_format_name:
'RCDFMTNAME'
;

clause: 
distribution_clause
| partitioning_clause
;

distribution_clause:
'DISTRIBUTIONCLAUSE'    #distributionClause
;

partitioning_clause:
'PARTITIONINGCLAUSE'    #partitioningClause
;

copy_options:
    'EXCLUDING IDENTITY...' #copyOptionsExclude
;

table_unique:
'UNIQUECONSTRAINT'
;

table_ref:
'FOREIGN KEY'   #foreignKeyRef
| 'CONSTRAINT' constraint_name #constraintRef
;

constraint_name:
NAME
;

table_check:
'CHECKCONSTRAINT'   
;


/* -------------------------------------------------------------------------------------------------------
                                                 COLUMNS                            
--------------------------------------------------------------------------------------------------------- */

column_definitions:
    column (COMMA column)*  #colDefs 
    ;

column:
    colName=col_name (forColumn=col_for_column)? colType=col_type (colOptions=col_options)*  #col 
    ;   

col_name: '"' colName=NAME '"'  #colNameBunnys
    |   colName=NAME    #colName;   

col_for_column:
    'FOR' ('COLUMN')? systemColumnName=col_system_column_name   #colForSysColName
;

col_system_column_name:
    'COLSYSTEMCOLUMNNAME'
;

col_options: 
    'NOT NULL' #colUnique
    | col_default_clause    #colDefaultClause
    | ('GENERATED ALWAYS'|'GENERATED BY DEFAULT') (IdentityOptions=col_options_identity_options)? #colGenAlwaysDefault
    | col_datalink_options  #colDataLinkOptions
    | col_constraint    #colConstraint
;

col_default_clause:
('WITH')? 'DEFAULT'
| ('WITH')? 'DEFAULT' (NUMBER|NAME|NUMBER DOT NUMBER)
| ('WITH')? 'DEFAULT' 'USER'
| ('WITH')? 'DEFAULT' 'NULL'
| ('WITH')? 'DEFAULT' 'CURRENT_DATE'
| ('WITH')? 'DEFAULT' 'CURRENT_TIME'
| ('WITH')? 'DEFAULT' 'CURRENT_TIMESTAMP'
| ('WITH')? 'DEFAULT' cast_function_name LEFT_PAREN ((NUMBER|NAME)|'USER'|'CURRENT_DATE'|'CURRENT_TIME'|'CURRENT_TIMESTAMP') RIGHT_PAREN
;


col_options_identity_options:
    'COLOPTIONSIDENTITYOPTIONS'
;

col_datalink_options:
    'COLDATALINKOPTIONS'
;

col_constraint:
    ('CONSTRAINT' constraintName=NAME)? (constrType1='PRIMARY KEY'|constrType2='UNIQUE'|constrType3=references_clause|constrType4='CHECK' LEFT_PAREN conCondition=check_condition RIGHT_PAREN)  #colConstraintDef
;

references_clause:
'REFCLAU'
;

check_condition: 
'CHECKCONDI'
;

cast_function_name:
'CFN'
; 




/* -----------------------------------------------------------------------------------------------------
                                                DATATYPES
------------------------------------------------------------------------------------------------------*/
col_type: 
    col_type_simple #colTypeSimple
    |col_type_dec   #colTypeDec
    |col_type_float #colTypeFloat
    |col_type_chars #colTypeChars
    |col_type_graphic   #colTypeGraphic
    |col_type_binary    #colTypeBinary
    |col_type_date  #colTypeDate
    |'DATALINK' (NUMBER)? (allocate_clause)? (ccsid_clause)?    #colTypeDatalink
    |'ROWID'    #colTypeRowId
    ;

allocate_clause:
'ALLOCATE' NUMBER   #allocateClause
;

ccsid_clause:
'CCSID' NUMBER ('NORMALIZED'|'NOT NORMALIZED')? #ccsidClause
;

col_type_simple:
  'SMALLINT'    #colTypeSmallInt
 |'INT' #colTypeInt
 |'BIGINT' #colTypeBigInt  
 | 'INTEGER' #colTypeInteger 
 ;

col_type_dec:
(dataType='NUMERIC'|(dataType='DECIMAL'|dataType='DEC')) (LEFT_PAREN dataTypePrecision=NUMBER (COMMA dataTypePrecision2=NUMBER)? RIGHT_PAREN)?  #colTypeDecimal
;

col_type_float:
'REAL'  #colTypeReal
| 'FLOAT' (LEFT_PAREN precision=NUMBER RIGHT_PAREN)?    #colTypeFloatDef
| 'DOUBLE' ('PRECISION')?   #colTypeDoubleDef
;

col_type_chars:
('CHAR'|'CHARACTER')  (LEFT_PAREN precision=NUMBER RIGHT_PAREN)?    (col_type_chars_mixed)? #colTypeChar
| ('VARCHAR'|'CHAR' 'VARYING'|'CHARACTER' 'VARYING') LEFT_PAREN precision=NUMBER RIGHT_PAREN (allocate_clause)?     col_type_chars_mixed #colTypeVarChar
| ('CHARACTER' 'LARGE OBJECT'|'CHAR' 'LARGE OBJECT'|'CLOB') ('(1M)'|NUMBER ('K'|'M'|'G'))? (allocate_clause)? col_type_chars_mixed  #colTypeCLOB
;

col_type_chars_mixed:
'FOR' ('BIT'|'SBCS'|'MIXED') 'DATA' #colTypeCharBitType
| ccsid_clause  #colTypeCharCCSID
;

col_type_graphic:
'GRAPHIC' ('(1)'|LEFT_PAREN NUMBER RIGHT_PAREN)? (ccsid_clause)?    #colTypeGraphicDef
| ('VARGRAPHIC'|'GRAPHIC' 'VARYING') LEFT_PAREN NUMBER RIGHT_PAREN (allocate_clause)? (ccsid_clause)? #colTypeVarGraphic
| 'DBCLOB' ('(1M)'| LEFT_PAREN NUMBER ('K'|'M'|'G') RIGHT_PAREN)? (allocate_clause)? (ccsid_clause)? #colTypeDBCLOB
;

col_type_binary:
'BINARY' ('(1)'|LEFT_PAREN NUMBER RIGHT_PAREN)?     #colTypeBinaryDef
| ('VARBINARY'|'BINARY' 'VARYING') LEFT_PAREN NUMBER RIGHT_PAREN (allocate_clause)?  #colTypeVarBinary
| ('BLOB'|'BINARY LARGE OBJECT') ('(1M)'| LEFT_PAREN NUMBER ('K'|'M'|'G') RIGHT_PAREN)? (allocate_clause)?  #colTypeBLOB
;

col_type_date:
'DATE'  #colTypeDateDef
| 'TIME' (LEFT_PAREN '0' RIGHT_PAREN)?  #colTypeTime 
| 'TIMESTAMP' (LEFT_PAREN '6' RIGHT_PAREN)? #colTypeTimestamp
;





/* LITERALS */

NUMBER  :   (DIGIT)+;  
NAME: [a-zA-Z0-9_]+ ;
FNUMBER: (DIGIT)+ DOT (DIGIT)+;
fragment DIGIT :   '0'..'9' ;
LEFT_PAREN : '(';
RIGHT_PAREN : ')';
COMMA : ',';
SEMICOLON : ';';
DOT :    '.';
WS : ( '\t' | ' ' | '\r' | '\n')+ -> skip;
当我现在试图解析像这样的字符列时

创建表TEST.TEST2 IBMSNAP_操作CHAR1

Antlr导致错误

extraneous input '(1)' expecting {'FOREIGN KEY', 'CONSTRAINT', 'LIKE', 'UNIQUECONSTRAINT', 'UNIQUE', 'COLDATALINKOPTIONS', 'CHECKCONSTRAINT', 'CHECK', 'DEFAULT', 'GENERATED BY DEFAULT', 'NOT NULL', '"', 'WITH', 'PRIMARY KEY', 'REFCLAU', 'GENERATED ALWAYS', NAME, ')', ','}
但是尝试

CREATE TABLE "TEST"."TEST2"  (
                  "IBMSNAP_OPERATION" CHAR(3));
很好用! 有人知道这个奇怪的问题吗


谢谢

问题在于语法中包含内联文本“1”,例如:

col_type_graphic:
    'GRAPHIC' ('(1)'...
因此,lexer将1视为单个标记,并且不能再按照CHAR定义中的要求将其解析为NUMBER

这不是你唯一的问题,你的语法中有很多这样的问题。例如,规则col_type_graphic包含“K”|“M”|“G”,它会将字母K、M、G声明为特殊标记,将其视为特殊关键字,例如,一旦尝试命名列K,就会出现问题

解决方案是,您必须将这些规则作为片段规则提取到lexer语法中,然后lexer将知道K本身不是词汇标记,只是与规则的其余部分结合在一起。但这并不简单,因为周围的规则不是词法规则,而是解析器规则,所以必须将其拆分为词法子规则和大型解析器规则


总而言之,要使该语法在Antlr中运行并生成有意义的解析树,需要做大量的工作。它的问题是,它包含了很多不应该成为关键字的文本,但是当您将它们定义为文本时,Antlr和许多其他解析器生成器将它们视为关键字。

非常感谢gexicide!我还不知道,但在过去的项目中遇到了类似的问题!看来我得多做点工作了;