From fbb59b7184964b814e315ee1d8946199ccedc84e Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Sat, 6 Dec 2025 00:40:16 +0000 Subject: [PATCH] gh-142236: Improve error location for missing comma in string concatenations When a multi-line string concatenation is missing a comma before the next argument, the error previously pointed to the first string in the concatenation, which was confusing because the actual fix needed to be applied after the last string. This change adds a helper function that detects multi-line expressions and adjusts the error position to point to the last line, making it immediately clear where the comma should be added. --- Grammar/python.gram | 3 +- Lib/test/test_syntax.py | 14 +++++++++ ...-12-06-00-38-37.gh-issue-142236.m3EF9E.rst | 3 ++ Parser/action_helpers.c | 29 +++++++++++++++++++ Parser/parser.c | 2 +- Parser/pegen.h | 1 + 6 files changed, 49 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-12-06-00-38-37.gh-issue-142236.m3EF9E.rst diff --git a/Grammar/python.gram b/Grammar/python.gram index 7ae00c6f005e7e..110136af81b596 100644 --- a/Grammar/python.gram +++ b/Grammar/python.gram @@ -1251,8 +1251,7 @@ invalid_expression: # !(NAME STRING) is not matched so we don't show this error with some invalid string prefixes like: kf"dsfsdf" # Soft keywords need to also be ignored because they can be parsed as NAME NAME | !(NAME STRING | SOFT_KEYWORD) a=disjunction b=expression_without_invalid { - _PyPegen_check_legacy_stmt(p, a) ? NULL : p->tokens[p->mark-1]->level == 0 ? NULL : - RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "invalid syntax. Perhaps you forgot a comma?") } + _PyPegen_raise_error_for_missing_comma(p, a, b) } | a=disjunction 'if' b=disjunction !('else'|':') { RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "expected 'else' after 'if' expression") } | a=disjunction 'if' b=disjunction 'else' !expression { RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("expected expression after 'else', but statement is given") } diff --git a/Lib/test/test_syntax.py b/Lib/test/test_syntax.py index ade8f273a1e088..93f0b98de71d81 100644 --- a/Lib/test/test_syntax.py +++ b/Lib/test/test_syntax.py @@ -3336,6 +3336,20 @@ def test_multiline_compiler_error_points_to_the_end(self): lineno=3 ) + def test_multiline_string_concat_missing_comma_points_to_last_string(self): + # gh-142236: For multi-line string concatenations with a missing comma, + # the error should point to the last string, not the first. + self._check_error( + "print(\n" + ' "line1"\n' + ' "line2"\n' + ' "line3"\n' + " x=1\n" + ")", + "Perhaps you forgot a comma", + lineno=4, # Points to "line3", the last string + ) + @support.cpython_only def test_syntax_error_on_deeply_nested_blocks(self): # This raises a SyntaxError, it used to raise a SystemError. Context diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-12-06-00-38-37.gh-issue-142236.m3EF9E.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-06-00-38-37.gh-issue-142236.m3EF9E.rst new file mode 100644 index 00000000000000..a8d37b49de7160 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-06-00-38-37.gh-issue-142236.m3EF9E.rst @@ -0,0 +1,3 @@ +Improve the "Perhaps you forgot a comma?" syntax error for multi-line string +concatenations to point to the last string instead of the first, making it +easier to locate where the comma is missing. Patch by Pablo Galindo. diff --git a/Parser/action_helpers.c b/Parser/action_helpers.c index b7a5b9d5e307b1..50856686335a14 100644 --- a/Parser/action_helpers.c +++ b/Parser/action_helpers.c @@ -947,6 +947,35 @@ _PyPegen_check_legacy_stmt(Parser *p, expr_ty name) { return 0; } +void * +_PyPegen_raise_error_for_missing_comma(Parser *p, expr_ty a, expr_ty b) +{ + // Don't raise for legacy statements like "print x" or "exec x" + if (_PyPegen_check_legacy_stmt(p, a)) { + return NULL; + } + // Only raise inside parentheses/brackets (level > 0) + if (p->tokens[p->mark - 1]->level == 0) { + return NULL; + } + // For multi-line expressions (like string concatenations), point to the + // last line instead of the first for a more helpful error message. + // Use a->col_offset as the starting column since all strings in the + // concatenation typically share the same indentation. + if (a->end_lineno > a->lineno) { + return RAISE_ERROR_KNOWN_LOCATION( + p, PyExc_SyntaxError, a->end_lineno, a->col_offset, + a->end_lineno, a->end_col_offset, + "invalid syntax. Perhaps you forgot a comma?" + ); + } + return RAISE_ERROR_KNOWN_LOCATION( + p, PyExc_SyntaxError, a->lineno, a->col_offset, + b->end_lineno, b->end_col_offset, + "invalid syntax. Perhaps you forgot a comma?" + ); +} + static ResultTokenWithMetadata * result_token_with_metadata(Parser *p, void *result, PyObject *metadata) { diff --git a/Parser/parser.c b/Parser/parser.c index 648b3702d8ff71..09bfb5725a2ec3 100644 --- a/Parser/parser.c +++ b/Parser/parser.c @@ -21445,7 +21445,7 @@ invalid_expression_rule(Parser *p) ) { D(fprintf(stderr, "%*c+ invalid_expression[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "!(NAME STRING | SOFT_KEYWORD) disjunction expression_without_invalid")); - _res = _PyPegen_check_legacy_stmt ( p , a ) ? NULL : p -> tokens [p -> mark - 1] -> level == 0 ? NULL : RAISE_SYNTAX_ERROR_KNOWN_RANGE ( a , b , "invalid syntax. Perhaps you forgot a comma?" ); + _res = _PyPegen_raise_error_for_missing_comma ( p , a , b ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; p->level--; diff --git a/Parser/pegen.h b/Parser/pegen.h index b8f887608b104e..be5333eb2684ae 100644 --- a/Parser/pegen.h +++ b/Parser/pegen.h @@ -358,6 +358,7 @@ expr_ty _PyPegen_ensure_real(Parser *p, expr_ty); asdl_seq *_PyPegen_join_sequences(Parser *, asdl_seq *, asdl_seq *); int _PyPegen_check_barry_as_flufl(Parser *, Token *); int _PyPegen_check_legacy_stmt(Parser *p, expr_ty t); +void *_PyPegen_raise_error_for_missing_comma(Parser *p, expr_ty a, expr_ty b); ResultTokenWithMetadata *_PyPegen_check_fstring_conversion(Parser *p, Token *, expr_ty t); ResultTokenWithMetadata *_PyPegen_setup_full_format_spec(Parser *, Token *, asdl_expr_seq *, int, int, int, int, PyArena *);