libcruft-util/rfc3986.rl

148 lines
4.1 KiB
Plaintext
Raw Normal View History

%%{
machine rfc3986;
#action trace;
#action success;
#action failure;
#action scheme_begin;
#action scheme_end;
#action hier_begin;
#action hier_end;
#action user_begin;
#action user_end;
#action host_begin;
#action host_end;
#action port_begin;
#action port_end;
#action authority_begin;
#action authority_end;
#action path_begin;
#action path_end;
#action query_begin;
#action query_end;
#action fragment_begin;
#action fragment_end;
#action uri_begin;
#action uri_end;
## Characters
unreserved = alpha | digit | "-" | "." | "_" | "~";
pct_encoded = '%' xdigit xdigit;
gen_delim = ":" | "/" | "?" | "#" | "[" | "]" | "@";
sub_delim = "!" | "$" | "&" | "'" | "(" | ")" | "*" | "+" | "," | ";" | "=";
2022-02-01 12:25:57 +10:00
# double quote is allowed here because it's quite common in real life and
# we don't have a great way to work around it here.
2022-02-23 07:26:57 +10:00
#
# pchar = unreserved | pct_encoded | sub_delim | ':' | '@' | '"';
pchar = (any - ('%' | '/' | '?' | '#')) | pct_encoded;
## Atoms
reg_name = (unreserved | pct_encoded | sub_delim)*;
## IP-address
## Note: The address grammar is embedded in the RFC so we embed it too
dec_octet = digit | [1-9] digit | '1' digit{2} | '2' [0-4] digit | '25' [0-5];
ipv4address = dec_octet '.' dec_octet '.' dec_octet '.' dec_octet;
h16 = xdigit{1,4};
ls32 = (h16 ":" h16) | ipv4address;
ipv6address =
(h16 ":"){6} ls32
| "::" (h16 ":"){5} ls32
| ( h16)? "::" (h16 ":"){4} ls32
| ((h16 ":"){0,1} h16)? "::" (h16 ":"){3} ls32
| ((h16 ":"){0,2} h16)? "::" (h16 ":"){2} ls32
| ((h16 ":"){0,3} h16)? "::" (h16 ":"){1} ls32
| ((h16 ":"){0,4} h16)? "::" ls32
| ((h16 ":"){0,5} h16)? "::" h16
| ((h16 ":"){0,6} h16)? "::"
;
ipvfuture = 'v' xdigit{1,} '.' (unreserved | sub_delim | ':'){1,};
ip_literal = '[' (ipv6address | ipvfuture) ']';
## Segments
segment = pchar*;
segment_nz = pchar{1,};
segment_nz_nc = (unreserved | pct_encoded | sub_delim | '@'){1,};
## Paths
path_abempty = ('/' segment)*;
path_absolute = '/' (segment_nz ('/' segment)*)?;
path_noscheme = segment_nz_nc ('/' segment)*;
path_rootless = segment_nz ('/' segment)*;
path_empty = zlen;
path = (
path_abempty | path_absolute | path_noscheme | path_rootless | path_empty
);
reserved = gen_delim | sub_delim;
## Authority
port = (
digit*
) >port_begin %port_end;
host = (
ip_literal | ipv4address | reg_name
) >host_begin %host_end;
userinfo = (
(unreserved | pct_encoded | sub_delim | ':')*
) >user_begin %user_end;
authority = (
(userinfo '@')? host (':' port)?
) >authority_begin %authority_end;
## URI components
scheme = (
alpha (alpha | digit | '+' | '-' | '.')*
) >scheme_begin %scheme_end;
query = (
(pchar | '/' | '?')*
) >query_begin %query_end;
fragment = (
(pchar | '/' | '?')*
) >fragment_begin %fragment_end;
## URI types
hier_part = (
(
'//' (authority path_abempty >path_begin %path_end) >hier_begin %hier_end
) | (
path_absolute >path_begin %path_end
| path_rootless >path_begin %path_end
| path_empty >path_begin %path_end
) >hier_begin %hier_end
);
uri = scheme ':' hier_part ('?' query)? ('#' fragment)?;
relative_part = (
'//' (authority path_abempty >path_begin %path_end) >hier_begin %hier_end
) | (
path_absolute >path_begin %path_end
| path_noscheme >path_begin %path_end
| path_empty >path_begin %path_end
) >hier_begin %hier_end;
relative_ref = relative_part ('?' query)? ('#' fragment)?;
uri_reference = uri | relative_ref;
absolute_uri = scheme ':' hier_part ('?' query)?;
URI = (
absolute_uri | uri_reference
);
}%%