; LDT 10/21/3023 
; modified in several significant ways
; 1) Literal strings are replaced with numbers and ranges (%d32 & %d32-126, etc.) when possible.
;    TRB and especially TRG operators are much more efficient than TLS operators.
; 2) RFC 3986 IPv6address does not work because of APG's "first-success disambiguation" and "greedy" repetitions.
;    IPv6address redefined and validations moved to callback functions (semantic vs syntactic validation)
;    Redefinition requires negative look-ahead operators, https://en.wikipedia.org/wiki/Syntactic_predicate
;    That is SABNF instead of simple ABNF.
; 3) RFC 3986 IPv4address fails because of "first-success disambiguation".
;    This could be fixed with rearrangement of the alternative terms. However, it would still not
;    accept zero-padded (leading zeros) decimal octets.
;    Therefore, IPv4address is also done with callback functions and semantic validation.
; 4) The negative look-ahead operator is also needed in the definition of host to
;    prevent failure with a reg-name that begins with an IPv4 address.
; 5) NOTE: host = 1.1.1.256 is a valid host name even though it is an invalid IPv4address.
;          The IPv4address alternative fails but the reg-name alternative succeeds.


; ------------------------------------------------------------------------------
; RFC 3986

URI           = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
hier-part     = "//" authority path-abempty
              / path-absolute
              / path-rootless
              / path-empty
scheme        = ALPHA *( ALPHA / DIGIT / %d43 / %d45-46 )
authority     = [ userinfo-at ] host [ ":" port ]
path-abempty  = *( "/" segment )
path-absolute = "/" [ segment-nz *( "/" segment ) ]
path-rootless = segment-nz *( "/" segment )
path-empty    = ""
userinfo-at   = userinfo %d64
                ; userinfo redefined to include the "@" so that it will fail without it
                ; otherwise userinfo can match host and then the parser will backtrack
                ; incorrectly keeping the captured userinfo phrase
userinfo      = *(%d97-122 / %d65-90 / %d48-57 / pct-encoded / %d33 / %d36 / %d38-46 / %d58-59 / %d61 / %d95 / %d126)
host          = IP-literal / (IPv4address !reg-name-char) / reg-name
                ; negative look-ahead required to prevent IPv4address from being recognized as first part of reg-name
                ; same fix as https://github.com/garycourt/uri-js/issues/4
IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
IPvFuture     = "v" 1*HEXDIG "." 1*( %d97-122 / %d65-90 / %d48-57 / %d33 / %d36 /%d38-46 / %d58-59 /%d61 /%d95 / %d126 )
IPv6address   = nodcolon / dcolon
nodcolon      = (h16n *h16cn) [%d58 IPv4address]
dcolon        = [h16 *h16c] %d58.58 (((h16n *h16cn) [%d58 IPv4address]) / [IPv4address])
h16           = 1*4HEXDIG
h16c          = %d58 1*4HEXDIG
h16n          = 1*4HEXDIG !%d46
h16cn         = %d58 1*4HEXDIG !%d46
IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
; Here we will will use callback functions to evaluate and validate the (possibly zero-padded) dec-octet.
dec-octet     =  1*3dec-digit
dec-digit     = %d48-57
reg-name      = *reg-name-char
reg-name-char = %d97-122 / %d65-90 / %d48-57 / pct-encoded / %d33 / %d36 / %d38-46 / %d59 / %d61 /%d95 / %d126
port          = *DIGIT
query         = *(pchar / %d47 / %d63)
fragment      = *(pchar / %d47 / %d63)

segment       = *pchar
segment-nz    = 1*pchar
pchar         = (%d97-122 / %d65-90 / %d48-57 / pct-encoded / %d33 / %d36 / %d38-46 /%d58-59 / %d61 / %d64 / %d95 / %d126)
pct-encoded   = %d37 HEXDIG HEXDIG

; no longer needed - expanded for all usage for fewer branches in the parse there
; and more efficient use of the TBS & TRG operators in place of TLS and rule names
; does not work with APG probably because of "first-success disambiguation" and greedy repetitions.
; will replace with semantic checking of valid number of h16s
;IPv6address   =                            6( h16 ":" ) ls32
;              /                       "::" 5( h16 ":" ) ls32
;              / [               h16 ] "::" 4( h16 ":" ) ls32
;              / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
;              / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
;              / [ *3( h16 ":" ) h16 ] "::"    h16 ":"   ls32
;              / [ *4( h16 ":" ) h16 ] "::"              ls32
;              / [ *5( h16 ":" ) h16 ] "::"              h16
;              / [ *6( h16 ":" ) h16 ] "::"
;ls32          = ( h16 ":" h16 ) / IPv4address
; dec-octet does not work because of "first-success disambiguation".
; Must have the longest (3-digit) numbers first.
; Even so, this form does not accept leading zeros.
; There does not seem to be a clear standard for this (https://en.wikipedia.org/wiki/Dot-decimal_notation)
; however and early RFC 790 did show leading-zero padding of the three digits.
;dec-octet     = DIGIT                 ; 0-9
;                 / %x31-39 DIGIT         ; 10-99
;                 / "1" 2DIGIT            ; 100-199
;                 / "2" %x30-34 DIGIT     ; 200-249
;                 / "25" %x30-35          ; 250-255
;statement = 1*( reserved / unreserved / " " )
;scheme        = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
;authority     = [ userinfo "@" ] host [ ":" port ]
;userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
;query         = *( pchar / "/" / "?" )
;fragment      = *( pchar / "/" / "?" )
;IPvFuture     = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
;reg-name      = *( unreserved / pct-encoded / sub-delims )
;pct-encoded   = "%" HEXDIG HEXDIG
;pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
;path-empty    = 0pchar; deprecated - empty literal string, "", is more efficient 
;unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
;reserved      = gen-delims / sub-delims
;gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
;sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
;              / "*" / "+" / "," / ";" / "="
;HEXDIG         =  DIGIT / "A" / "B" / "C" / "D" / "E" / "F"

; ------------------------------------------------------------------------------
; RFC 5234

ALPHA          =  %x41-5A / %x61-7A   ; A-Z / a-z
LF             =  %x0A
                  ; linefeed
DIGIT          =  %x30-39
                  ; 0-9
HEXDIG         = %d48-57 / %d65-70 / %d97-102

