uri: extract grammar into contained ragel file
this will allow us to reference the grammar from other grammars, eg http parsing.
This commit is contained in:
parent
202c22eee8
commit
d0d5ae549e
@ -27,7 +27,7 @@ endif()
|
||||
|
||||
###############################################################################
|
||||
RAGEL_TARGET(json-flat json/flat.cpp.rl ${CMAKE_CURRENT_BINARY_DIR}/json/flat.cpp COMPILE_FLAGS -G2)
|
||||
RAGEL_TARGET(uri uri.cpp.rl ${CMAKE_CURRENT_BINARY_DIR}/uri.cpp)
|
||||
RAGEL_TARGET(uri uri.cpp.rl ${CMAKE_CURRENT_BINARY_DIR}/uri.cpp COMPILE_FLAGS -G2)
|
||||
RAGEL_TARGET(version version.cpp.rl ${CMAKE_CURRENT_BINARY_DIR}/version.cpp)
|
||||
|
||||
|
||||
|
67
abnf.rl
Normal file
67
abnf.rl
Normal file
@ -0,0 +1,67 @@
|
||||
void foo (void) { }
|
||||
|
||||
%%{
|
||||
|
||||
machine rfc5234;
|
||||
|
||||
###############################################################################
|
||||
# RFC5234 ABNF core rules
|
||||
|
||||
# ; A-Z / a-z
|
||||
ALPHA = 0x41..0x5A | 0x61..0x7A;
|
||||
BIT = '0' | '1';
|
||||
|
||||
# any 7-bit US-ASCII character, excluding NUL
|
||||
CHAR = 0x01..0x7F;
|
||||
|
||||
# carriage return
|
||||
CR = 0x0D;
|
||||
|
||||
# linefeed
|
||||
LF = 0x0A;
|
||||
|
||||
# Internet standard newline
|
||||
CRLF = CR LF;
|
||||
|
||||
# controls
|
||||
CTL = 0x00..0x1F | 0x7F;
|
||||
|
||||
# 0-9
|
||||
DIGIT = 0x30..0x39;
|
||||
|
||||
# " (Double Quote)
|
||||
DQUOTE = 0x22;
|
||||
|
||||
HEXDIG = DIGIT | 'A'..'F';
|
||||
|
||||
# horizontal tab
|
||||
HTAB = 0x09;
|
||||
|
||||
SP = 0x20;
|
||||
|
||||
# white space
|
||||
WSP = SP | HTAB;
|
||||
|
||||
# Use of this linear-white-space rule permits lines containing only white space
|
||||
# that are no longer legal in mail headers and have caused interoperability
|
||||
# problems in other contexts.
|
||||
#
|
||||
# Do not use when defining mail headers and use with caution in other contexts.
|
||||
LWSP = (WSP | CRLF WSP)*;
|
||||
|
||||
# 8 bits of data
|
||||
OCTET = any; #0x00..0xFF;
|
||||
|
||||
# visible (printing) characters
|
||||
VCHAR = 0x21..0x7E;
|
||||
|
||||
write data;
|
||||
|
||||
}%%
|
||||
|
||||
int main () {
|
||||
|
||||
|
||||
%%write init;
|
||||
%%write exec;
|
||||
}
|
140
rfc3986.rl
Normal file
140
rfc3986.rl
Normal file
@ -0,0 +1,140 @@
|
||||
%%{
|
||||
machine rfc3986;
|
||||
|
||||
#action trace;
|
||||
#action success;
|
||||
#action failure;
|
||||
|
||||
#action scheme_begin;
|
||||
#action scheme_end;
|
||||
#action hier_begin;
|
||||
#action hier_end;
|
||||
#action user_begin;
|
||||
#action user_end;
|
||||
#action host_begin;
|
||||
#action host_end;
|
||||
#action port_begin;
|
||||
#action port_end;
|
||||
#action authority_begin;
|
||||
#action authority_end;
|
||||
#action path_begin;
|
||||
#action path_end;
|
||||
#action query_begin;
|
||||
#action query_end;
|
||||
#action fragment_begin;
|
||||
#action fragment_end;
|
||||
|
||||
#action uri_begin;
|
||||
#action uri_end;
|
||||
|
||||
## Characters
|
||||
unreserved = alpha | digit | "-" | "." | "_" | "~";
|
||||
pct_encoded = '%' xdigit xdigit;
|
||||
gen_delim = ":" | "/" | "?" | "#" | "[" | "]" | "@";
|
||||
sub_delim = "!" | "$" | "&" | "'" | "(" | ")" | "*" | "+" | "," | ";" | "=";
|
||||
pchar = unreserved | pct_encoded | sub_delim | ':' | '@';
|
||||
|
||||
## Atoms
|
||||
reg_name = (unreserved | pct_encoded | sub_delim)*;
|
||||
|
||||
## IP-address
|
||||
## Note: The address grammar is embedded in the RFC so we embed it too
|
||||
dec_octet = digit | [1-9] digit | '1' digit{2} | '2' [0-4] digit | '25' [0-5];
|
||||
|
||||
ipv4address = dec_octet '.' dec_octet '.' dec_octet '.' dec_octet;
|
||||
|
||||
h16 = xdigit{1,4};
|
||||
ls32 = (h16 ":" h16) | ipv4address;
|
||||
|
||||
ipv6address =
|
||||
(h16 ":"){6} ls32
|
||||
| "::" (h16 ":"){5} ls32
|
||||
| ( h16)? "::" (h16 ":"){4} ls32
|
||||
| ((h16 ":"){0,1} h16)? "::" (h16 ":"){3} ls32
|
||||
| ((h16 ":"){0,2} h16)? "::" (h16 ":"){2} ls32
|
||||
| ((h16 ":"){0,3} h16)? "::" (h16 ":"){1} ls32
|
||||
| ((h16 ":"){0,4} h16)? "::" ls32
|
||||
| ((h16 ":"){0,5} h16)? "::" h16
|
||||
| ((h16 ":"){0,6} h16)? "::"
|
||||
;
|
||||
|
||||
ipvfuture = 'v' xdigit{1,} '.' (unreserved | sub_delim | ':'){1,};
|
||||
ip_literal = '[' (ipv6address | ipvfuture) ']';
|
||||
|
||||
## Segments
|
||||
segment = pchar*;
|
||||
segment_nz = pchar{1,};
|
||||
segment_nz_nc = (unreserved | pct_encoded | sub_delim | '@'){1,};
|
||||
|
||||
## Paths
|
||||
path_abempty = ('/' segment)*;
|
||||
path_absolute = '/' (segment_nz ('/' segment)*)?;
|
||||
path_noscheme = segment_nz_nc ('/' segment)*;
|
||||
path_rootless = segment_nz ('/' segment)*;
|
||||
path_empty = '0' pchar;
|
||||
|
||||
path = (
|
||||
path_abempty | path_absolute | path_noscheme | path_rootless | path_empty
|
||||
);
|
||||
|
||||
reserved = gen_delim | sub_delim;
|
||||
|
||||
## Authority
|
||||
port = (
|
||||
digit*
|
||||
) >port_begin %port_end;
|
||||
|
||||
host = (
|
||||
ip_literal | ipv4address | reg_name
|
||||
) >host_begin %host_end;
|
||||
|
||||
userinfo = (
|
||||
(unreserved | pct_encoded | sub_delim | ':')*
|
||||
) >user_begin %user_end;
|
||||
|
||||
authority = (
|
||||
(userinfo '@')? host (':' port)?
|
||||
) >authority_begin %authority_end;
|
||||
|
||||
|
||||
## URI components
|
||||
scheme = (
|
||||
alpha (alpha | digit | '+' | '-' | '.')*
|
||||
) >scheme_begin %scheme_end;
|
||||
|
||||
query = (
|
||||
(pchar | '/' | '?')*
|
||||
) >query_begin %query_end;
|
||||
|
||||
fragment = (
|
||||
(pchar | '/' | '?')*
|
||||
) >fragment_begin %fragment_end;
|
||||
|
||||
## URI types
|
||||
hier_part = (
|
||||
'//' (authority path_abempty >path_begin %path_end) >hier_begin %hier_end
|
||||
) | (
|
||||
path_absolute >path_begin %path_end
|
||||
| path_rootless >path_begin %path_end
|
||||
| path_empty >path_begin %path_end
|
||||
) >hier_begin %hier_end;
|
||||
|
||||
uri = scheme ':' hier_part ('?' query)? ('#' fragment);
|
||||
|
||||
relative_part =
|
||||
'//' authority path_abempty >path_begin %path_end
|
||||
| path_absolute >path_begin %path_end
|
||||
| path_noscheme >path_begin %path_end
|
||||
| path_empty >path_begin %path_end
|
||||
;
|
||||
|
||||
relative_ref = relative_part ('?' query)? ('#' fragment);
|
||||
|
||||
uri_reference = uri | relative_ref;
|
||||
|
||||
absolute_uri = scheme ':' hier_part ('?' query)?;
|
||||
|
||||
URI = (
|
||||
absolute_uri | uri_reference
|
||||
) >uri_begin %uri_end;
|
||||
}%%
|
114
uri.cpp.rl
114
uri.cpp.rl
@ -27,7 +27,7 @@
|
||||
|
||||
|
||||
%%{
|
||||
machine uri;
|
||||
machine impl;
|
||||
|
||||
action trace { if (0) std::cerr << *p; }
|
||||
action success {__success = true; }
|
||||
@ -60,116 +60,12 @@
|
||||
action fragment_begin { m_views[FRAGMENT] = { p, p}; }
|
||||
action fragment_end { m_views[FRAGMENT] = { m_views[FRAGMENT].begin (), p }; }
|
||||
|
||||
## Characters
|
||||
unreserved = alpha | digit | "-" | "." | "_" | "~";
|
||||
pct_encoded = '%' xdigit xdigit;
|
||||
gen_delim = ":" | "/" | "?" | "#" | "[" | "]" | "@";
|
||||
sub_delim = "!" | "$" | "&" | "'" | "(" | ")" | "*" | "+" | "," | ";" | "=";
|
||||
pchar = unreserved | pct_encoded | sub_delim | ':' | '@';
|
||||
action uri_begin {}
|
||||
action uri_end {}
|
||||
|
||||
## Atoms
|
||||
reg_name = (unreserved | pct_encoded | sub_delim)*;
|
||||
include rfc3986 'rfc3986.rl';
|
||||
|
||||
## IP-address
|
||||
## Note: The address grammar is embedded in the RFC so we embed it too
|
||||
dec_octet = digit | [1-9] digit | '1' digit{2} | '2' [0-4] digit | '25' [0-5];
|
||||
|
||||
ipv4address = dec_octet '.' dec_octet '.' dec_octet '.' dec_octet;
|
||||
|
||||
h16 = xdigit{1,4};
|
||||
ls32 = (h16 ":" h16) | ipv4address;
|
||||
|
||||
ipv6address =
|
||||
(h16 ":"){6} ls32
|
||||
| "::" (h16 ":"){5} ls32
|
||||
| ( h16)? "::" (h16 ":"){4} ls32
|
||||
| ((h16 ":"){0,1} h16)? "::" (h16 ":"){3} ls32
|
||||
| ((h16 ":"){0,2} h16)? "::" (h16 ":"){2} ls32
|
||||
| ((h16 ":"){0,3} h16)? "::" (h16 ":"){1} ls32
|
||||
| ((h16 ":"){0,4} h16)? "::" ls32
|
||||
| ((h16 ":"){0,5} h16)? "::" h16
|
||||
| ((h16 ":"){0,6} h16)? "::"
|
||||
;
|
||||
|
||||
ipvfuture = 'v' xdigit{1,} '.' (unreserved | sub_delim | ':'){1,};
|
||||
ip_literal = '[' (ipv6address | ipvfuture) ']';
|
||||
|
||||
## Segments
|
||||
segment = pchar*;
|
||||
segment_nz = pchar{1,};
|
||||
segment_nz_nc = (unreserved | pct_encoded | sub_delim | '@'){1,};
|
||||
|
||||
## Paths
|
||||
path_abempty = ('/' segment)*;
|
||||
path_absolute = '/' (segment_nz ('/' segment)*)?;
|
||||
path_noscheme = segment_nz_nc ('/' segment)*;
|
||||
path_rootless = segment_nz ('/' segment)*;
|
||||
path_empty = '0' pchar;
|
||||
|
||||
path = (
|
||||
path_abempty | path_absolute | path_noscheme | path_rootless | path_empty
|
||||
);
|
||||
|
||||
reserved = gen_delim | sub_delim;
|
||||
|
||||
## Authority
|
||||
port = (
|
||||
digit*
|
||||
) >port_begin %port_end;
|
||||
|
||||
host = (
|
||||
ip_literal | ipv4address | reg_name
|
||||
) >host_begin %host_end;
|
||||
|
||||
userinfo = (
|
||||
(unreserved | pct_encoded | sub_delim | ':')*
|
||||
) >user_begin %user_end;
|
||||
|
||||
authority = (
|
||||
(userinfo '@')? host (':' port)?
|
||||
) >authority_begin %authority_end;
|
||||
|
||||
|
||||
## URI components
|
||||
scheme = (
|
||||
alpha (alpha | digit | '+' | '-' | '.')*
|
||||
) >scheme_begin %scheme_end;
|
||||
|
||||
query = (
|
||||
(pchar | '/' | '?')*
|
||||
) >query_begin %query_end;
|
||||
|
||||
fragment = (
|
||||
(pchar | '/' | '?')*
|
||||
) >fragment_begin %fragment_end;
|
||||
|
||||
## URI types
|
||||
hier_part = (
|
||||
'//' (authority path_abempty >path_begin %path_end) >hier_begin %hier_end
|
||||
) | (
|
||||
path_absolute >path_begin %path_end
|
||||
| path_rootless >path_begin %path_end
|
||||
| path_empty >path_begin %path_end
|
||||
) >hier_begin %hier_end;
|
||||
|
||||
uri = scheme ':' hier_part ('?' query)? ('#' fragment);
|
||||
|
||||
relative_part =
|
||||
'//' authority path_abempty >path_begin %path_end
|
||||
| path_absolute >path_begin %path_end
|
||||
| path_noscheme >path_begin %path_end
|
||||
| path_empty >path_begin %path_end
|
||||
;
|
||||
|
||||
relative_ref = relative_part ('?' query)? ('#' fragment);
|
||||
|
||||
uri_reference = uri | relative_ref;
|
||||
|
||||
absolute_uri = scheme ':' hier_part ('?' query)?;
|
||||
|
||||
URI := (
|
||||
absolute_uri | uri_reference
|
||||
)
|
||||
impl := URI
|
||||
%success
|
||||
$!failure
|
||||
$trace;
|
||||
|
Loading…
Reference in New Issue
Block a user