From 4b4cf6049d658dcc93b17c121a6bd8fea01ce42a Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Thu, 21 Mar 2019 00:12:09 +0000 Subject: [PATCH 1/7] bpo-36256: Fix bug in parsermodule when parsing if statements In the parser module, when validating nodes before starting the parsing with to create a ST in "parser_newstobject" there is a problem that appears when two arcs in the same DFA state has transitions with labels with the same type. For example, the DFA for if_stmt has a state with two labels with the same type: "elif" and "else" (type NAME). The algorithm tries one by one the arcs until the label that starts the arc transition has a label with the same type of the current child label we are triying to accept. In this case, the arc for "elif" comes before the arc for "else"and passes this test (because the current child label is "else" and has the same type as "elif"). This lead to expecting a namedexpr_test (305) instead of a colon (11). The solution is to compare also the string representation (in case there is one) of the labels to see if the transition that we have is the correct one. --- Lib/test/test_parser.py | 4 ++++ Modules/parsermodule.c | 6 +++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_parser.py b/Lib/test/test_parser.py index 5548a871c0244f..bfa0a5a34e2d78 100644 --- a/Lib/test/test_parser.py +++ b/Lib/test/test_parser.py @@ -319,6 +319,10 @@ def test_try_stmt(self): self.check_suite("try: pass\nexcept: pass\nelse: pass\n" "finally: pass\n") + def test_if_stmt(self): + self.check_suite("if True:\n pass\nelse:\n pass\n") + self.check_suite("if True:\n pass\nelif True:\n pass\nelse:\n pass\n") + def test_position(self): # An absolutely minimal test of position information. Better # tests would be a big project. diff --git a/Modules/parsermodule.c b/Modules/parsermodule.c index b0a749a3bc17b6..73e813c5036b9f 100644 --- a/Modules/parsermodule.c +++ b/Modules/parsermodule.c @@ -675,7 +675,11 @@ validate_node(node *tree) for (arc = 0; arc < dfa_state->s_narcs; ++arc) { short a_label = dfa_state->s_arc[arc].a_lbl; assert(a_label < _PyParser_Grammar.g_ll.ll_nlabels); - if (_PyParser_Grammar.g_ll.ll_label[a_label].lb_type == ch_type) { + char* label_str = _PyParser_Grammar.g_ll.ll_label[a_label].lb_str; + if ((_PyParser_Grammar.g_ll.ll_label[a_label].lb_type == ch_type) + && ( (ch->n_str == NULL ) || (label_str == NULL) + || (strcmp(ch->n_str, label_str) == 0)) + ) { /* The child is acceptable; if non-terminal, validate it recursively. */ if (ISNONTERMINAL(ch_type) && !validate_node(ch)) return 0; From ca3e88dca0fd4330240b3ebb0cad49153c914a60 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Thu, 21 Mar 2019 00:24:24 +0000 Subject: [PATCH 2/7] Add News entry --- .../Core and Builtins/2019-03-21-00-24-18.bpo-12477.OZHa0t.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2019-03-21-00-24-18.bpo-12477.OZHa0t.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2019-03-21-00-24-18.bpo-12477.OZHa0t.rst b/Misc/NEWS.d/next/Core and Builtins/2019-03-21-00-24-18.bpo-12477.OZHa0t.rst new file mode 100644 index 00000000000000..aada7f912a6cff --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2019-03-21-00-24-18.bpo-12477.OZHa0t.rst @@ -0,0 +1,2 @@ +Fix bug in parsermodule when parsing a state in a DFA that has two or more +arcs with labels of the same type. Patch by Pablo Galindo. From a902d9605da87edbbbdd76de3b420eebee8b5875 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Thu, 21 Mar 2019 10:06:38 +0000 Subject: [PATCH 3/7] fixup! Add News entry --- Modules/parsermodule.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/Modules/parsermodule.c b/Modules/parsermodule.c index 73e813c5036b9f..2f746435959c1a 100644 --- a/Modules/parsermodule.c +++ b/Modules/parsermodule.c @@ -675,6 +675,7 @@ validate_node(node *tree) for (arc = 0; arc < dfa_state->s_narcs; ++arc) { short a_label = dfa_state->s_arc[arc].a_lbl; assert(a_label < _PyParser_Grammar.g_ll.ll_nlabels); + char* label_str = _PyParser_Grammar.g_ll.ll_label[a_label].lb_str; if ((_PyParser_Grammar.g_ll.ll_label[a_label].lb_type == ch_type) && ( (ch->n_str == NULL ) || (label_str == NULL) @@ -692,17 +693,24 @@ validate_node(node *tree) /* What would this state have accepted? */ { short a_label = dfa_state->s_arc->a_lbl; - int next_type; if (!a_label) /* Wouldn't accept any more children */ goto illegal_num_children; - next_type = _PyParser_Grammar.g_ll.ll_label[a_label].lb_type; - if (ISNONTERMINAL(next_type)) + int next_type = _PyParser_Grammar.g_ll.ll_label[a_label].lb_type; + const char* expected_str = _PyParser_Grammar.g_ll.ll_label[a_label].lb_str; + + if (ISNONTERMINAL(next_type)) { PyErr_Format(parser_error, "Expected node type %d, got %d.", next_type, ch_type); - else + } + else if (expected_str) { + PyErr_Format(parser_error, "Illegal terminal: expected '%s'.", + expected_str); + } + else { PyErr_Format(parser_error, "Illegal terminal: expected %s.", _PyParser_TokenNames[next_type]); + } return 0; } From c2aeb32d5731cbc61ba1095902e91c63a83c5aba Mon Sep 17 00:00:00 2001 From: Brett Cannon Date: Thu, 21 Mar 2019 21:24:49 +0000 Subject: [PATCH 4/7] Update Modules/parsermodule.c Co-Authored-By: pablogsal --- Modules/parsermodule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/parsermodule.c b/Modules/parsermodule.c index 2f746435959c1a..ba1393f06ca899 100644 --- a/Modules/parsermodule.c +++ b/Modules/parsermodule.c @@ -703,7 +703,7 @@ validate_node(node *tree) PyErr_Format(parser_error, "Expected node type %d, got %d.", next_type, ch_type); } - else if (expected_str) { + else if (expected_str != NULL) { PyErr_Format(parser_error, "Illegal terminal: expected '%s'.", expected_str); } From eb14255df1e56c9ac60436e51ba4ff0553b98164 Mon Sep 17 00:00:00 2001 From: Brett Cannon Date: Thu, 21 Mar 2019 21:25:14 +0000 Subject: [PATCH 5/7] Update Modules/parsermodule.c Co-Authored-By: pablogsal --- Modules/parsermodule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/parsermodule.c b/Modules/parsermodule.c index ba1393f06ca899..f3e56b2c6d34b3 100644 --- a/Modules/parsermodule.c +++ b/Modules/parsermodule.c @@ -697,7 +697,7 @@ validate_node(node *tree) goto illegal_num_children; int next_type = _PyParser_Grammar.g_ll.ll_label[a_label].lb_type; - const char* expected_str = _PyParser_Grammar.g_ll.ll_label[a_label].lb_str; + const char *expected_str = _PyParser_Grammar.g_ll.ll_label[a_label].lb_str; if (ISNONTERMINAL(next_type)) { PyErr_Format(parser_error, "Expected node type %d, got %d.", From a9b67d4b3599cd077756a85cd38e550614106a6a Mon Sep 17 00:00:00 2001 From: Brett Cannon Date: Thu, 21 Mar 2019 21:25:27 +0000 Subject: [PATCH 6/7] Update Modules/parsermodule.c Co-Authored-By: pablogsal --- Modules/parsermodule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/parsermodule.c b/Modules/parsermodule.c index f3e56b2c6d34b3..1280b53ef7bbf8 100644 --- a/Modules/parsermodule.c +++ b/Modules/parsermodule.c @@ -676,7 +676,7 @@ validate_node(node *tree) short a_label = dfa_state->s_arc[arc].a_lbl; assert(a_label < _PyParser_Grammar.g_ll.ll_nlabels); - char* label_str = _PyParser_Grammar.g_ll.ll_label[a_label].lb_str; + const char *label_str = _PyParser_Grammar.g_ll.ll_label[a_label].lb_str; if ((_PyParser_Grammar.g_ll.ll_label[a_label].lb_type == ch_type) && ( (ch->n_str == NULL ) || (label_str == NULL) || (strcmp(ch->n_str, label_str) == 0)) From db148c089a652f97597f0a23fd16f7b15c141c8b Mon Sep 17 00:00:00 2001 From: Zackery Spytz Date: Thu, 21 Mar 2019 21:49:33 +0000 Subject: [PATCH 7/7] Update Modules/parsermodule.c Co-Authored-By: pablogsal --- Modules/parsermodule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/parsermodule.c b/Modules/parsermodule.c index 1280b53ef7bbf8..fd330b5fbe0214 100644 --- a/Modules/parsermodule.c +++ b/Modules/parsermodule.c @@ -678,7 +678,7 @@ validate_node(node *tree) const char *label_str = _PyParser_Grammar.g_ll.ll_label[a_label].lb_str; if ((_PyParser_Grammar.g_ll.ll_label[a_label].lb_type == ch_type) - && ( (ch->n_str == NULL ) || (label_str == NULL) + && ((ch->n_str == NULL) || (label_str == NULL) || (strcmp(ch->n_str, label_str) == 0)) ) { /* The child is acceptable; if non-terminal, validate it recursively. */