contents
 

xml Language Reference


Regular Grammar

( legend )

 Extensible Markup Language
 Extensible Markup Language ( base tokens 1 )
let  Char  :: 

'\09' | '\0a' | '\0d' | '\x00000020' .. '\x0000d7ff' | '\x0000e000' .. '\x0000fffd' | '\x00010000' .. '\x0010ffff'


 
     
let  Space  :: 

'\09' | '\0a' | '\0d' | '\20'


 
     
let  BaseChar  :: 

'\x00000041' .. '\x0000005A' | '\x00000061' .. '\x0000007A' | '\x000000C0' .. '\x000000D6' | '\x000000D8' .. '\x000000F6' | '\x000000F8' .. '\x000000FF' | '\x00000100' .. '\x00000131' | '\x00000134' .. '\x0000013E' | '\x00000141' .. '\x00000148' | '\x0000014A' .. '\x0000017E' | '\x00000180' .. '\x000001C3' | '\x000001CD' .. '\x000001F0' | '\x000001F4' .. '\x000001F5' | '\x000001FA' .. '\x00000217' | '\x00000250' .. '\x000002A8' | '\x000002BB' .. '\x000002C1' | '\x00000386' | '\x00000388' .. '\x0000038A' | '\x0000038C' | '\x0000038E' .. '\x000003A1' | '\x000003A3' .. '\x000003CE' | '\x000003D0' .. '\x000003D6' | '\x000003DA' | '\x000003DC' | '\x000003DE' | '\x000003E0' | '\x000003E2' .. '\x000003F3' | '\x00000401' .. '\x0000040C' | '\x0000040E' .. '\x0000044F' | '\x00000451' .. '\x0000045C' | '\x0000045E' .. '\x00000481' | '\x00000490' .. '\x000004C4' | '\x000004C7' .. '\x000004C8' | '\x000004CB' .. '\x000004CC' | '\x000004D0' .. '\x000004EB' | '\x000004EE' .. '\x000004F5' | '\x000004F8' .. '\x000004F9' | '\x00000531' .. '\x00000556' | '\x00000559' | '\x00000561' .. '\x00000586' | '\x000005D0' .. '\x000005EA' | '\x000005F0' .. '\x000005F2' | '\x00000621' .. '\x0000063A' | '\x00000641' .. '\x0000064A' | '\x00000671' .. '\x000006B7' | '\x000006BA' .. '\x000006BE' | '\x000006C0' .. '\x000006CE' | '\x000006D0' .. '\x000006D3' | '\x000006D5' | '\x000006E5' .. '\x000006E6' | '\x00000905' .. '\x00000939' | '\x0000093D' | '\x00000958' .. '\x00000961' | '\x00000985' .. '\x0000098C' | '\x0000098F' .. '\x00000990' | '\x00000993' .. '\x000009A8' | '\x000009AA' .. '\x000009B0' | '\x000009B2' | '\x000009B6' .. '\x000009B9' | '\x000009DC' .. '\x000009DD' | '\x000009DF' .. '\x000009E1' | '\x000009F0' .. '\x000009F1' | '\x00000A05' .. '\x00000A0A' | '\x00000A0F' .. '\x00000A10' | '\x00000A13' .. '\x00000A28' | '\x00000A2A' .. '\x00000A30' | '\x00000A32' .. '\x00000A33' | '\x00000A35' .. '\x00000A36' | '\x00000A38' .. '\x00000A39' | '\x00000A59' .. '\x00000A5C' | '\x00000A5E' | '\x00000A72' .. '\x00000A74' | '\x00000A85' .. '\x00000A8B' | '\x00000A8D' | '\x00000A8F' .. '\x00000A91' | '\x00000A93' .. '\x00000AA8' | '\x00000AAA' .. '\x00000AB0' | '\x00000AB2' .. '\x00000AB3' | '\x00000AB5' .. '\x00000AB9' | '\x00000ABD' | '\x00000AE0' | '\x00000B05' .. '\x00000B0C' | '\x00000B0F' .. '\x00000B10' | '\x00000B13' .. '\x00000B28' | '\x00000B2A' .. '\x00000B30' | '\x00000B32' .. '\x00000B33' | '\x00000B36' .. '\x00000B39' | '\x00000B3D' | '\x00000B5C' .. '\x00000B5D' | '\x00000B5F' .. '\x00000B61' | '\x00000B85' .. '\x00000B8A' | '\x00000B8E' .. '\x00000B90' | '\x00000B92' .. '\x00000B95' | '\x00000B99' .. '\x00000B9A' | '\x00000B9C' | '\x00000B9E' .. '\x00000B9F' | '\x00000BA3' .. '\x00000BA4' | '\x00000BA8' .. '\x00000BAA' | '\x00000BAE' .. '\x00000BB5' | '\x00000BB7' .. '\x00000BB9' | '\x00000C05' .. '\x00000C0C' | '\x00000C0E' .. '\x00000C10' | '\x00000C12' .. '\x00000C28' | '\x00000C2A' .. '\x00000C33' | '\x00000C35' .. '\x00000C39' | '\x00000C60' .. '\x00000C61' | '\x00000C85' .. '\x00000C8C' | '\x00000C8E' .. '\x00000C90' | '\x00000C92' .. '\x00000CA8' | '\x00000CAA' .. '\x00000CB3' | '\x00000CB5' .. '\x00000CB9' | '\x00000CDE' | '\x00000CE0' .. '\x00000CE1' | '\x00000D05' .. '\x00000D0C' | '\x00000D0E' .. '\x00000D10' | '\x00000D12' .. '\x00000D28' | '\x00000D2A' .. '\x00000D39' | '\x00000D60' .. '\x00000D61' | '\x00000E01' .. '\x00000E2E' | '\x00000E30' | '\x00000E32' .. '\x00000E33' | '\x00000E40' .. '\x00000E45' | '\x00000E81' .. '\x00000E82' | '\x00000E84' | '\x00000E87' .. '\x00000E88' | '\x00000E8A' | '\x00000E8D' | '\x00000E94' .. '\x00000E97' | '\x00000E99' .. '\x00000E9F' | '\x00000EA1' .. '\x00000EA3' | '\x00000EA5' | '\x00000EA7' | '\x00000EAA' .. '\x00000EAB' | '\x00000EAD' .. '\x00000EAE' | '\x00000EB0' | '\x00000EB2' .. '\x00000EB3' | '\x00000EBD' | '\x00000EC0' .. '\x00000EC4' | '\x00000F40' .. '\x00000F47' | '\x00000F49' .. '\x00000F69' | '\x000010A0' .. '\x000010C5' | '\x000010D0' .. '\x000010F6' | '\x00001100' | '\x00001102' .. '\x00001103' | '\x00001105' .. '\x00001107' | '\x00001109' | '\x0000110B' .. '\x0000110C' | '\x0000110E' .. '\x00001112' | '\x0000113C' | '\x0000113E' | '\x00001140' | '\x0000114C' | '\x0000114E' | '\x00001150' | '\x00001154' .. '\x00001155' | '\x00001159' | '\x0000115F' .. '\x00001161' | '\x00001163' | '\x00001165' | '\x00001167' | '\x00001169' | '\x0000116D' .. '\x0000116E' | '\x00001172' .. '\x00001173' | '\x00001175' | '\x0000119E' | '\x000011A8' | '\x000011AB' | '\x000011AE' .. '\x000011AF' | '\x000011B7' .. '\x000011B8' | '\x000011BA' | '\x000011BC' .. '\x000011C2' | '\x000011EB' | '\x000011F0' | '\x000011F9' | '\x00001E00' .. '\x00001E9B' | '\x00001EA0' .. '\x00001EF9' | '\x00001F00' .. '\x00001F15' | '\x00001F18' .. '\x00001F1D' | '\x00001F20' .. '\x00001F45' | '\x00001F48' .. '\x00001F4D' | '\x00001F50' .. '\x00001F57' | '\x00001F59' | '\x00001F5B' | '\x00001F5D' | '\x00001F5F' .. '\x00001F7D' | '\x00001F80' .. '\x00001FB4' | '\x00001FB6' .. '\x00001FBC' | '\x00001FBE' | '\x00001FC2' .. '\x00001FC4' | '\x00001FC6' .. '\x00001FCC' | '\x00001FD0' .. '\x00001FD3' | '\x00001FD6' .. '\x00001FDB' | '\x00001FE0' .. '\x00001FEC' | '\x00001FF2' .. '\x00001FF4' | '\x00001FF6' .. '\x00001FFC' | '\x00002126' | '\x0000212A' .. '\x0000212B' | '\x0000212E' | '\x00002180' .. '\x00002182' | '\x00003041' .. '\x00003094' | '\x000030A1' .. '\x000030FA' | '\x00003105' .. '\x0000312C' | '\x0000AC00' .. '\x0000D7A3'


 
     
let  CombChar  :: 

'\x00000300' .. '\x00000345' | '\x00000360' .. '\x00000361' | '\x00000483' .. '\x00000486' | '\x00000591' .. '\x000005A1' | '\x000005A3' .. '\x000005B9' | '\x000005BB' .. '\x000005BD' | '\x000005BF' | '\x000005C1' .. '\x000005C2' | '\x000005C4' | '\x0000064B' .. '\x00000652' | '\x00000670' | '\x000006D6' .. '\x000006DC' | '\x000006DD' .. '\x000006DF' | '\x000006E0' .. '\x000006E4' | '\x000006E7' .. '\x000006E8' | '\x000006EA' .. '\x000006ED' | '\x00000901' .. '\x00000903' | '\x0000093C' | '\x0000093E' .. '\x0000094C' | '\x0000094D' | '\x00000951' .. '\x00000954' | '\x00000962' .. '\x00000963' | '\x00000981' .. '\x00000983' | '\x000009BC' | '\x000009BE' | '\x000009BF' | '\x000009C0' .. '\x000009C4' | '\x000009C7' .. '\x000009C8' | '\x000009CB' .. '\x000009CD' | '\x000009D7' | '\x000009E2' .. '\x000009E3' | '\x00000A02' | '\x00000A3C' | '\x00000A3E' | '\x00000A3F' | '\x00000A40' .. '\x00000A42' | '\x00000A47' .. '\x00000A48' | '\x00000A4B' .. '\x00000A4D' | '\x00000A70' .. '\x00000A71' | '\x00000A81' .. '\x00000A83' | '\x00000ABC' | '\x00000ABE' .. '\x00000AC5' | '\x00000AC7' .. '\x00000AC9' | '\x00000ACB' .. '\x00000ACD' | '\x00000B01' .. '\x00000B03' | '\x00000B3C' | '\x00000B3E' .. '\x00000B43' | '\x00000B47' .. '\x00000B48' | '\x00000B4B' .. '\x00000B4D' | '\x00000B56' .. '\x00000B57' | '\x00000B82' .. '\x00000B83' | '\x00000BBE' .. '\x00000BC2' | '\x00000BC6' .. '\x00000BC8' | '\x00000BCA' .. '\x00000BCD' | '\x00000BD7' | '\x00000C01' .. '\x00000C03' | '\x00000C3E' .. '\x00000C44' | '\x00000C46' .. '\x00000C48' | '\x00000C4A' .. '\x00000C4D' | '\x00000C55' .. '\x00000C56' | '\x00000C82' .. '\x00000C83' | '\x00000CBE' .. '\x00000CC4' | '\x00000CC6' .. '\x00000CC8' | '\x00000CCA' .. '\x00000CCD' | '\x00000CD5' .. '\x00000CD6' | '\x00000D02' .. '\x00000D03' | '\x00000D3E' .. '\x00000D43' | '\x00000D46' .. '\x00000D48' | '\x00000D4A' .. '\x00000D4D' | '\x00000D57' | '\x00000E31' | '\x00000E34' .. '\x00000E3A' | '\x00000E47' .. '\x00000E4E' | '\x00000EB1' | '\x00000EB4' .. '\x00000EB9' | '\x00000EBB' .. '\x00000EBC' | '\x00000EC8' .. '\x00000ECD' | '\x00000F18' .. '\x00000F19' | '\x00000F35' | '\x00000F37' | '\x00000F39' | '\x00000F3E' | '\x00000F3F' | '\x00000F71' .. '\x00000F84' | '\x00000F86' .. '\x00000F8B' | '\x00000F90' .. '\x00000F95' | '\x00000F97' | '\x00000F99' .. '\x00000FAD' | '\x00000FB1' .. '\x00000FB7' | '\x00000FB9' | '\x000020D0' .. '\x000020DC' | '\x000020E1' | '\x0000302A' .. '\x0000302F' | '\x00003099' | '\x0000309A'


 
     
let  Extender  :: 

'\x000000B7' | '\x000002D0' | '\x000002D1' | '\x00000387' | '\x00000640' | '\x00000E46' | '\x00000EC6' | '\x00003005' | '\x00003031' .. '\x00003035' | '\x0000309D' .. '\x0000309E' | '\x000030FC' .. '\x000030FE'


 
     
let  Digit  :: 

'\x00000030' .. '\x00000039' | '\x00000660' .. '\x00000669' | '\x000006F0' .. '\x000006F9' | '\x00000966' .. '\x0000096F' | '\x000009E6' .. '\x000009EF' | '\x00000A66' .. '\x00000A6F' | '\x00000AE6' .. '\x00000AEF' | '\x00000B66' .. '\x00000B6F' | '\x00000BE7' .. '\x00000BEF' | '\x00000C66' .. '\x00000C6F' | '\x00000CE6' .. '\x00000CEF' | '\x00000D66' .. '\x00000D6F' | '\x00000E50' .. '\x00000E59' | '\x00000ED0' .. '\x00000ED9' | '\x00000F20' .. '\x00000F29'


 
     
let  Ideograf  :: 

'\x00004E00' .. '\x00009FA5' | '\x00003007' | '\x00003021' .. '\x00003029'


 
     
let  Letter  :: 

BaseChar | Ideograf


 
     
let  PubChar  :: 

'\20' | '\0d' | '\0a' | HexChar | '-\'()+,./:=?;!*#@$_%'


 
     
let  HexChar  :: 

'0' .. '9' | 'a' .. 'z' | 'A' .. 'Z'


 
     
let  NameChar  :: 

Letter | Digit | '.:-_' | CombChar | Extender


 
     
let  KANY  :: 

'Aa' 'Nn' 'Yy'


 
     
let  KATTLIST  :: 

'Aa' 'Tt' 'Tt' 'Ll' 'Ii' 'Ss' 'Tt'


 
     
let  KCDATA  :: 

'Cc' 'Dd' 'Aa' 'Tt' 'Aa'


 
     
let  KELEMENT  :: 

'Ee' 'Ll' 'Ee' 'Mm' 'Ee' 'Nn' 'Tt'


 
     
let  KEMPTY  :: 

'Ee' 'Mm' 'Pp' 'Tt' 'Yy'


 
     
let  KENCODING  :: 

'Ee' 'Nn' 'Cc' 'Oo' 'Dd' 'Ii' 'Nn' 'Gg'


 
     
let  KENTITY  :: 

'Ee' 'Nn' 'Tt' 'Ii' 'Tt' 'Yy'


 
     
let  KENTITIES  :: 

'Ee' 'Nn' 'Tt' 'Ii' 'Tt' 'Ii' 'Ee' 'Ss'


 
     
let  KFIXED  :: 

'Ff' 'Ii' 'Xx' 'Ee' 'Dd'


 
     
let  KID  :: 

'Ii' 'Dd'


 
     
let  KIDREF  :: 

'Ii' 'Dd' 'Rr' 'Ee' 'Ff'


 
     
let  KIDREFS  :: 

'Ii' 'Dd' 'Rr' 'Ee' 'Ff' 'Ss'


 
     
let  KIGNORE  :: 

'Ii' 'Gg' 'Nn' 'Oo' 'Rr' 'Ee'


 
     
let  KIMPLIED  :: 

'Ii' 'Mm' 'Pp' 'Ll' 'Ii' 'Ee' 'Dd'


 
     
let  KINCLUDE  :: 

'Ii' 'Nn' 'Cc' 'Ll' 'Uu' 'Dd' 'Ee'


 
     
let  KNDATA  :: 

'Nn' 'Dd' 'Aa' 'Tt' 'Aa'


 
     
let  KNMTOKEN  :: 

'Nn' 'Mm' 'Tt' 'Oo' 'Kk' 'Ee' 'Nn'


 
     
let  KNMTOKENS  :: 

'Nn' 'Mm' 'Tt' 'Oo' 'Kk' 'Ee' 'Nn' 'Ss'


 
     
let  KNOTATION  :: 

'Nn' 'Oo' 'Tt' 'Aa' 'Tt' 'Ii' 'Oo' 'Nn'


 
     
let  KNO  :: 

'Nn' 'Oo'


 
     
let  KPCDATA  :: 

'Pp' 'Cc' 'Dd' 'Aa' 'Tt' 'Aa'


 
     
let  KPUBLIC  :: 

'Pp' 'Uu' 'Bb' 'Ll' 'Ii' 'Cc'


 
     
let  KREQUIRED  :: 

'Rr' 'Ee' 'Qq' 'Uu' 'Ii' 'Rr' 'Ee' 'Dd'


 
     
let  KSTANDALONE  :: 

'Ss' 'Tt' 'Aa' 'Nn' 'Dd' 'Aa' 'Ll' 'Oo' 'Nn' 'Ee'


 
     
let  KSYSTEM  :: 

'Ss' 'Yy' 'Ss' 'Tt' 'Ee' 'Mm'


 
     
let  KVERSION  :: 

'Vv' 'Ee' 'Rr' 'Ss' 'Ii' 'Oo' 'Nn'


 
     
let  KXML  :: 

'Xx' 'Mm' 'Ll'


 
     
let  KYES  :: 

'Yy' 'Ee' 'Ss'


 
     
let  KDOCTYPE  :: 

'Dd' 'Oo' 'Cc' 'Tt' 'Yy' 'Pp' 'Ee'


 
     
let  Keyword  :: 

KANY | KATTLIST | KCDATA | KDOCTYPE | KELEMENT | KEMPTY | KENCODING | KENTITY | KENTITIES | KFIXED | KID | KIDREF | KIDREFS | KIGNORE | KIMPLIED | KINCLUDE | KNDATA | KNMTOKEN | KNMTOKENS | KNO | KNOTATION | KPCDATA | KPUBLIC | KREQUIRED | KSTANDALONE | KSYSTEM | KVERSION | KXML | KYES


 
     
let  Ide  :: 

( Letter | '_' | ':' ) { NameChar }


 
     
tok  DCharRef  :: 

"&#" ( '0' .. '9' ) + ";"


 
     
tok  HCharRef  :: 

"&#x" HexChar + ";"


 
     
tok  ERef  :: 

"&" Ide ";"


 
     
tok  PERef  :: 

"%" Ide ";"


 
     
 Extensible Markup Language ( base tokens 2 )
tok  Nmtoken  :: 

( NameChar - ( Letter | '_' | ':' ) ) { NameChar }


 
     
let  EQ  :: 

{ Space } '=' { Space }


 
     
tok  XMLDecl  :: 

"<?" KXML [ Space + KVERSION EQ '\'\"' ( HexChar | '.:-_' ) + '\'\"' ] [ Space + KENCODING EQ '\'\"' ( HexChar | '-_' ) + '\'\"' ] [ Space + KSTANDALONE EQ '\'\"' ( KYES | KNO ) '\'\"' ] { Space } "?>"


 
     
tok  PI  :: 

( "<?" Ide ( { Char } - ( { Char } "?>" { Char } ) ) "?>" ) - XMLDecl


 
     
tok  CDSect  :: 

"<![" KCDATA "[" ( { Char } - ( { Char } "]]>" { Char } ) ) "]]>"


 
     
tok  ElmStart  :: 

"<" | "</" | "<!"


 
     
tok  TagEnd  :: 

">" | "/>"


 
     
tok  XMLOpr  :: 

'=,*?+|()[]%#' | "<![" | "]]>" | "?>" | ")*"


 
     
 xml-relevant tokens
tok  Name  :: 

Ide


 
     
lan  dtd:dtdEmbed:dtd  :: 

dtd

 

 
     
tok  DTDStart:dtdEmbed  :: 

"<!" KDOCTYPE


 
     
tok  Literal  :: 

"\"" { Char - '<\"' } "\"" | "\'" { Char - '<\'' } "\'"


 
     
let  DChar  :: 

Char - ( '<>&=,*?+|()[]%#/\'\"' | Space | NameChar )


 
     
tok  CharData  :: 

DChar +


 
     
tok  Empty  :: 

Space +


 
     
tok  Comment  :: 

"<!--" ( { Char } - ( { Char } "--" { Char } ) ) "-->"


 



Context-free Grammar

( legend )


start  XDoc  :: 

[ XMLDecl ] Content


 
     
let  Attr  :: 

[ Empty ] Name [ Empty ] = [ Empty ] Literal | [ Empty ]


 
     
let  Attrs  :: 

[ ( Attr Attrs ) + ] { Attr }


 
     
let  Content  :: 

[ CDecl Content ]


 
     
let  EBody  :: 

/>


 
     
   | 

> Content </ Name [ Empty ] >

 

 
     
let  CDecl  :: 

DTDStart dtdEmbed


 
     
   | 

CharData

 

 
     
   | 

Literal

 

 
     
   | 

Nmtoken

 

 
     
   | 

DCharRef

 

 
     
   | 

HCharRef

 

 
     
   | 

XMLOpr

 

 
     
   | 

/>

 

 
     
   | 

< Name Attrs EBody

 

 
     
   | 

>

 

 
     
   | 

CDSect

 

 
     
   | 

Name

 

 
     
   | 

_other_

 

 
     
   | 

ERef

 

 
     
   | 

PERef

 

 
     
   | 

=

 

 
     
   | 

Comment

 

 
     
   | 

[ Empty ]

 

 
     
   | 

PI