Skip to content

Improve preg_split() function ReturnType #3757

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 36 commits into from
May 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
5784175
feat improve preg_split type Extension
malsuke Dec 25, 2024
e595e1e
feat add test for varibles
malsuke Dec 25, 2024
0f52c61
feat add benevolent type to preg_split
malsuke Dec 25, 2024
4c3681d
feat new feat for flag
malsuke Dec 25, 2024
c4e76c9
feat improve for flag & non-empty-string
malsuke Dec 25, 2024
68379ac
add test for PREG_SPLIT_DELIM_CAPTURE flag
malsuke Dec 25, 2024
2031d06
add test case for nonEmptySubject
malsuke Dec 26, 2024
82e4ce6
feat add if state for nonEmptySubject
malsuke Dec 26, 2024
fd496c2
feat cleanup
malsuke Dec 26, 2024
96896ca
feat cleanup
malsuke Dec 26, 2024
6ff7b8c
feat cleanup
malsuke Dec 26, 2024
c9852d5
feat add is_int assertion
malsuke Dec 26, 2024
6d1d6e9
feat fix test
malsuke Dec 26, 2024
6549506
feat fix test
malsuke Dec 26, 2024
3d51233
fix cleanup
malsuke Dec 26, 2024
540e1b2
fix cleanup
malsuke Dec 26, 2024
d48a512
fix cleanup
malsuke Dec 26, 2024
d020927
fix cleanup loop
malsuke Dec 26, 2024
13937b8
fix __benevolent usage
malsuke Jan 14, 2025
99b2d4d
fix test
malsuke Jan 14, 2025
bd4b7d2
fix test
malsuke Jan 14, 2025
eb3e9f4
fix test
malsuke Jan 14, 2025
daa3072
fix test
malsuke Jan 14, 2025
4672297
fix coding style
malsuke Jan 14, 2025
b809edc
fix: use utils function, return point, allow numeric-string
malsuke Mar 7, 2025
c96736b
feat: add test for Error
malsuke Mar 7, 2025
de4c495
feat: migrate validation to private method
malsuke Mar 7, 2025
a305dab
fix: coding style
malsuke Mar 7, 2025
3f7bdcc
fix: coding style
malsuke Mar 7, 2025
658826a
feat: change variable name, fix: check type of limit/flag
malsuke Mar 11, 2025
ca78451
add: add test for scaler value
malsuke Mar 11, 2025
c896b1b
fix: lint
malsuke Mar 11, 2025
5949c79
feat: return union false
malsuke Mar 12, 2025
a54bd80
feat: Handle case when preg_split returns false
malsuke Apr 17, 2025
1ee1a79
fix: for using ConstantArrayTypeBuilder and UnionType
malsuke May 15, 2025
798fa23
fix: cleanup
malsuke May 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion resources/functionMap.php
Original file line number Diff line number Diff line change
Expand Up @@ -9081,7 +9081,7 @@
'preg_replace' => ['string|array|null', 'regex'=>'string|array', 'replace'=>'string|array', 'subject'=>'string|array', 'limit='=>'int', '&w_count='=>'int'],
'preg_replace_callback' => ['string|array|null', 'regex'=>'string|array', 'callback'=>'callable(array<int|string, string>):string', 'subject'=>'string|array', 'limit='=>'int', '&w_count='=>'int'],
'preg_replace_callback_array' => ['string|array|null', 'pattern'=>'array<string,callable>', 'subject'=>'string|array', 'limit='=>'int', '&w_count='=>'int'],
'preg_split' => ['list<string>|false', 'pattern'=>'string', 'subject'=>'string', 'limit='=>'?int', 'flags='=>'int'],
'preg_split' => ['list<string>|list<array{string, int<0, max>}>|false', 'pattern'=>'string', 'subject'=>'string', 'limit='=>'?int', 'flags='=>'int'],
'prev' => ['mixed', '&rw_array_arg'=>'array|object'],
'print_r' => ['string|true', 'var'=>'mixed', 'return='=>'bool'],
'printf' => ['int', 'format'=>'string', '...values='=>'__stringAndStringable|int|float|null|bool'],
Expand Down
179 changes: 169 additions & 10 deletions src/Type/Php/PregSplitDynamicReturnTypeExtension.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,41 @@

namespace PHPStan\Type\Php;

use Nette\Utils\RegexpException;
use Nette\Utils\Strings;
use PhpParser\Node\Expr\FuncCall;
use PHPStan\Analyser\Scope;
use PHPStan\Reflection\FunctionReflection;
use PHPStan\TrinaryLogic;
use PHPStan\Type\Accessory\AccessoryArrayListType;
use PHPStan\Type\Accessory\AccessoryNonEmptyStringType;
use PHPStan\Type\Accessory\NonEmptyArrayType;
use PHPStan\Type\ArrayType;
use PHPStan\Type\BitwiseFlagHelper;
use PHPStan\Type\Constant\ConstantArrayType;
use PHPStan\Type\Constant\ConstantArrayTypeBuilder;
use PHPStan\Type\Constant\ConstantBooleanType;
use PHPStan\Type\Constant\ConstantIntegerType;
use PHPStan\Type\Constant\ConstantStringType;
use PHPStan\Type\DynamicFunctionReturnTypeExtension;
use PHPStan\Type\ErrorType;
use PHPStan\Type\IntegerRangeType;
use PHPStan\Type\IntegerType;
use PHPStan\Type\MixedType;
use PHPStan\Type\StringType;
use PHPStan\Type\Type;
use PHPStan\Type\TypeCombinator;
use PHPStan\Type\UnionType;
use function count;
use function is_array;
use function is_int;
use function is_numeric;
use function preg_split;
use function strtolower;

final class PregSplitDynamicReturnTypeExtension implements DynamicFunctionReturnTypeExtension
{

public function __construct(
private BitwiseFlagHelper $bitwiseFlagAnalyser,
private readonly BitwiseFlagHelper $bitwiseFlagAnalyser,
)
{
}
Expand All @@ -36,17 +48,164 @@ public function isFunctionSupported(FunctionReflection $functionReflection): boo

public function getTypeFromFunctionCall(FunctionReflection $functionReflection, FuncCall $functionCall, Scope $scope): ?Type
{
$flagsArg = $functionCall->getArgs()[3] ?? null;
$args = $functionCall->getArgs();
if (count($args) < 2) {
return null;
}
$patternArg = $args[0];
$subjectArg = $args[1];
$limitArg = $args[2] ?? null;
$capturesOffset = $args[3] ?? null;
$patternType = $scope->getType($patternArg->value);
$patternConstantTypes = $patternType->getConstantStrings();
if (count($patternConstantTypes) > 0) {
foreach ($patternConstantTypes as $patternConstantType) {
if ($this->isValidPattern($patternConstantType->getValue()) === false) {
return new ErrorType();
}
}
}

$subjectType = $scope->getType($subjectArg->value);
$subjectConstantTypes = $subjectType->getConstantStrings();

$limits = [];
if ($limitArg === null) {
$limits = [-1];
} else {
$limitType = $scope->getType($limitArg->value);
if (!$this->isIntOrStringValue($limitType)) {
return new ErrorType();
}
foreach ($limitType->getConstantScalarValues() as $limit) {
if (!is_int($limit) && !is_numeric($limit)) {
return new ErrorType();
}
$limits[] = $limit;
}
}

$flags = [];
if ($capturesOffset === null) {
$flags = [0];
} else {
$flagType = $scope->getType($capturesOffset->value);
if (!$this->isIntOrStringValue($flagType)) {
return new ErrorType();
}
foreach ($flagType->getConstantScalarValues() as $flag) {
if (!is_int($flag) && !is_numeric($flag)) {
return new ErrorType();
}
$flags[] = $flag;
}
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By replacing it as follows, type checking within multiple Constant loops will no longer be necessary.

$flags = [];
$flagType = $scope->getType($flagArg->value);
foreach ($flagType->getConstantScalarValues() as $flag) {
    if (!is_int()) {
        return new ErrorType();
    }

    $flags[] = $flag;
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

resolved 8cb3030


if ($flagsArg !== null && $this->bitwiseFlagAnalyser->bitwiseOrContainsConstant($flagsArg->value, $scope, 'PREG_SPLIT_OFFSET_CAPTURE')->yes()) {
$type = new ArrayType(
new IntegerType(),
new ConstantArrayType([new ConstantIntegerType(0), new ConstantIntegerType(1)], [new StringType(), IntegerRangeType::fromInterval(0, null)], [2], [], TrinaryLogic::createYes()),
if ($this->isPatternOrSubjectEmpty($patternConstantTypes, $subjectConstantTypes)) {
if ($capturesOffset !== null
&& $this->bitwiseFlagAnalyser->bitwiseOrContainsConstant($capturesOffset->value, $scope, 'PREG_SPLIT_NO_EMPTY')->yes()) {
$returnStringType = TypeCombinator::intersect(
new StringType(),
new AccessoryNonEmptyStringType(),
);
} else {
$returnStringType = new StringType();
}

$arrayTypeBuilder = ConstantArrayTypeBuilder::createEmpty();
$arrayTypeBuilder->setOffsetValueType(
new ConstantIntegerType(0),
$returnStringType,
);
$arrayTypeBuilder->setOffsetValueType(
new ConstantIntegerType(1),
IntegerRangeType::fromInterval(0, null),
);
return TypeCombinator::union(TypeCombinator::intersect($type, new AccessoryArrayListType()), new ConstantBooleanType(false));
$capturedArrayType = $arrayTypeBuilder->getArray();

$returnInternalValueType = $returnStringType;
if ($capturesOffset !== null) {
$flagState = $this->bitwiseFlagAnalyser->bitwiseOrContainsConstant($capturesOffset->value, $scope, 'PREG_SPLIT_OFFSET_CAPTURE');
if ($flagState->yes()) {
$capturedArrayListType = TypeCombinator::intersect(
new ArrayType(new IntegerType(), $capturedArrayType),
new AccessoryArrayListType(),
);

if ($subjectType->isNonEmptyString()->yes()) {
$capturedArrayListType = TypeCombinator::intersect($capturedArrayListType, new NonEmptyArrayType());
}
return TypeCombinator::union($capturedArrayListType, new ConstantBooleanType(false));
}
if ($flagState->maybe()) {
$returnInternalValueType = TypeCombinator::union(new StringType(), $capturedArrayType);
}
}

$returnListType = TypeCombinator::intersect(new ArrayType(new MixedType(), $returnInternalValueType), new AccessoryArrayListType());
if ($subjectType->isNonEmptyString()->yes()) {
$returnListType = TypeCombinator::intersect(
$returnListType,
new NonEmptyArrayType(),
);
}

return TypeCombinator::union($returnListType, new ConstantBooleanType(false));
}

return null;
$resultTypes = [];
foreach ($patternConstantTypes as $patternConstantType) {
foreach ($subjectConstantTypes as $subjectConstantType) {
foreach ($limits as $limit) {
foreach ($flags as $flag) {
$result = @preg_split($patternConstantType->getValue(), $subjectConstantType->getValue(), (int) $limit, (int) $flag);
if ($result === false) {
return new ErrorType();
}
$constantArray = ConstantArrayTypeBuilder::createEmpty();
foreach ($result as $key => $value) {
if (is_array($value)) {
$valueConstantArray = ConstantArrayTypeBuilder::createEmpty();
$valueConstantArray->setOffsetValueType(new ConstantIntegerType(0), new ConstantStringType($value[0]));
$valueConstantArray->setOffsetValueType(new ConstantIntegerType(1), new ConstantIntegerType($value[1]));
$returnInternalValueType = $valueConstantArray->getArray();
} else {
$returnInternalValueType = new ConstantStringType($value);
}
$constantArray->setOffsetValueType(new ConstantIntegerType($key), $returnInternalValueType);
}

$resultTypes[] = $constantArray->getArray();
}
}
}
}
$resultTypes[] = new ConstantBooleanType(false);
return TypeCombinator::union(...$resultTypes);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we are missing false. in the preg_match inference we decided this can get false even if all args a valid and static analysis time known, because a regex pattern might be super inefficient (or pattern based attacks might trick the regex engine into return false)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the above comment is still true and we are missing the false here

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, does this mean that every possible result of preg_split includes the possibility of false, and therefore we need to add false to the union type?

I had implemented it to return an Error if preg_split returns false, as a warning.
So, does this mean I should include false in all cases, instead of returning Error?

Copy link
Contributor

@staabm staabm Mar 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The return type of false at runtime is necessary because the preg_split call can fail even if we know everything IIRC.

The current "return ErrorType" could be turned into "return null" in case other rules will already report a phpstan error for the code examples.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have fixed about that in the following commit.
307cf54

Additionally, since handling for the false case is no longer necessary, I have removed if ($result === false).

}

/**
* @param ConstantStringType[] $patternConstantArray
* @param ConstantStringType[] $subjectConstantArray
*/
private function isPatternOrSubjectEmpty(array $patternConstantArray, array $subjectConstantArray): bool
{
return count($patternConstantArray) === 0 || count($subjectConstantArray) === 0;
}

private function isValidPattern(string $pattern): bool
{
try {
Strings::match('', $pattern);
} catch (RegexpException) {
return false;
}
return true;
}

private function isIntOrStringValue(Type $type): bool
{
return (new UnionType([new IntegerType(), new StringType()]))->isSuperTypeOf($type)->yes();
}

}
2 changes: 1 addition & 1 deletion tests/PHPStan/Analyser/AnalyserIntegrationTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -895,7 +895,7 @@ public function testBug7554(): void
$this->assertSame(sprintf('Parameter #1 $%s of function count expects array|Countable, list<array<int, int<0, max>|string>>|false given.', PHP_VERSION_ID < 80000 ? 'var' : 'value'), $errors[0]->getMessage());
$this->assertSame(26, $errors[0]->getLine());

$this->assertSame('Cannot access offset int<1, max> on list<array{string, int<0, max>}>|false.', $errors[1]->getMessage());
$this->assertSame('Cannot access offset int<1, max> on list<array{non-empty-string, int<0, max>}>|false.', $errors[1]->getMessage());
$this->assertSame(27, $errors[1]->getLine());
}

Expand Down
79 changes: 72 additions & 7 deletions tests/PHPStan/Analyser/nsrt/preg_split.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,77 @@

class HelloWorld
{
private const NUMERIC_STRING_1 = "1";
private const NUMERIC_STRING_NEGATIVE_1 = "-1";

public function doFoo()
{
assertType('list<string>|false', preg_split('/-/', '1-2-3'));
assertType('list<string>|false', preg_split('/-/', '1-2-3', -1, PREG_SPLIT_NO_EMPTY));
assertType('list<array{string, int<0, max>}>|false', preg_split('/-/', '1-2-3', -1, PREG_SPLIT_OFFSET_CAPTURE));
assertType('list<array{string, int<0, max>}>|false', preg_split('/-/', '1-2-3', -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE));
assertType("array{''}|false", preg_split('/-/', ''));
assertType("array{}|false", preg_split('/-/', '', -1, PREG_SPLIT_NO_EMPTY));
assertType("array{'1', '-', '2', '-', '3'}|false", preg_split('/ *(-) */', '1- 2-3', -1, PREG_SPLIT_DELIM_CAPTURE));
assertType("array{array{'', 0}}|false", preg_split('/-/', '', -1, PREG_SPLIT_OFFSET_CAPTURE));
assertType("array{}|false", preg_split('/-/', '', -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE));
assertType("array{'1', '2', '3'}|false", preg_split('/-/', '1-2-3'));
assertType("array{'1', '2', '3'}|false", preg_split('/-/', '1-2-3', -1, PREG_SPLIT_NO_EMPTY));
assertType("array{'1', '3'}|false", preg_split('/-/', '1--3', -1, PREG_SPLIT_NO_EMPTY));
assertType("array{array{'1', 0}, array{'2', 2}, array{'3', 4}}|false", preg_split('/-/', '1-2-3', -1, PREG_SPLIT_OFFSET_CAPTURE));
assertType("array{array{'1', 0}, array{'2', 2}, array{'3', 4}}|false", preg_split('/-/', '1-2-3', -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE));
assertType("array{array{'1', 0}, array{'', 2}, array{'3', 3}}|false", preg_split('/-/', '1--3', -1, PREG_SPLIT_OFFSET_CAPTURE));
assertType("array{array{'1', 0}, array{'3', 3}}|false", preg_split('/-/', '1--3', -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE));

assertType("array{'1', '2', '3'}|false", preg_split('/-/', '1-2-3', self::NUMERIC_STRING_NEGATIVE_1));
assertType("array{'1', '2', '3'}|false", preg_split('/-/', '1-2-3', -1, self::NUMERIC_STRING_1));
}

public function doWithError() {
assertType('*ERROR*', preg_split('/[0-9a]', '1-2-3'));
assertType('*ERROR*', preg_split('/-/', '1-2-3', 'hogehoge'));
assertType('*ERROR*', preg_split('/-/', '1-2-3', -1, 'hogehoge'));
assertType('*ERROR*', preg_split('/-/', '1-2-3', [], self::NUMERIC_STRING_NEGATIVE_1));
assertType('*ERROR*', preg_split('/-/', '1-2-3', null, self::NUMERIC_STRING_NEGATIVE_1));
assertType('*ERROR*', preg_split('/-/', '1-2-3', -1, []));
assertType('*ERROR*', preg_split('/-/', '1-2-3', -1, null));
}

public function doWithVariables(string $pattern, string $subject, int $offset, int $flags): void
{
assertType("array{'1', '2', '3'}|array{'1-2-3'}|false", preg_split('/-/', '1-2-3', $this->generate()));
assertType("array{'1', '2', '3'}|array{'1-2-3'}|false", preg_split('/-/', '1-2-3', $this->generate(), $this->generate()));

assertType('list<array{string, int<0, max>}|string>|false', preg_split($pattern, $subject, $offset, $flags));
assertType('list<array{string, int<0, max>}|string>|false', preg_split("//", $subject, $offset, $flags));

assertType('non-empty-list<array{string, int<0, max>}|string>|false', preg_split($pattern, "1-2-3", $offset, $flags));
assertType('list<array{string, int<0, max>}|string>|false', preg_split($pattern, $subject, -1, $flags));
assertType('list<non-empty-string>|false', preg_split($pattern, $subject, $offset, PREG_SPLIT_NO_EMPTY));
assertType('list<array{string, int<0, max>}>|false', preg_split($pattern, $subject, $offset, PREG_SPLIT_OFFSET_CAPTURE));
assertType("list<string>|false", preg_split($pattern, $subject, $offset, PREG_SPLIT_DELIM_CAPTURE));
assertType('list<array{string, int<0, max>}>|false', preg_split($pattern, $subject, $offset, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_OFFSET_CAPTURE));
}

/**
* @return 1|'17'
*/
private function generate(): int|string {
return (rand() % 2 === 0) ? 1 : "17";
}

/**
* @param non-empty-string $nonEmptySubject
*/
public function doWithNonEmptySubject(string $pattern, string $nonEmptySubject, int $offset, int $flags): void
{
assertType('non-empty-list<string>|false', preg_split("//", $nonEmptySubject));

assertType('non-empty-list<array{string, int<0, max>}|string>|false', preg_split($pattern, $nonEmptySubject, $offset, $flags));
assertType('non-empty-list<array{string, int<0, max>}|string>|false', preg_split("//", $nonEmptySubject, $offset, $flags));

assertType('non-empty-list<array{string, int<0, max>}>|false', preg_split("/-/", $nonEmptySubject, $offset, PREG_SPLIT_OFFSET_CAPTURE));
assertType('non-empty-list<non-empty-string>|false', preg_split("/-/", $nonEmptySubject, $offset, PREG_SPLIT_NO_EMPTY));
assertType('non-empty-list<string>|false', preg_split("/-/", $nonEmptySubject, $offset, PREG_SPLIT_DELIM_CAPTURE));
assertType('non-empty-list<array{string, int<0, max>}>|false', preg_split("/-/", $nonEmptySubject, $offset, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_OFFSET_CAPTURE));
assertType('non-empty-list<array{non-empty-string, int<0, max>}>|false', preg_split("/-/", $nonEmptySubject, $offset, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE));
assertType('non-empty-list<non-empty-string>|false', preg_split("/-/", $nonEmptySubject, $offset, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE));
}

/**
Expand All @@ -26,16 +91,16 @@ public static function splitWithOffset($pattern, $subject, $limit = -1, $flags =
{
assertType('list<array{string, int<0, max>}>|false', preg_split($pattern, $subject, $limit, $flags | PREG_SPLIT_OFFSET_CAPTURE));
assertType('list<array{string, int<0, max>}>|false', preg_split($pattern, $subject, $limit, PREG_SPLIT_OFFSET_CAPTURE | $flags));

assertType('list<array{string, int<0, max>}>|false', preg_split($pattern, $subject, $limit, PREG_SPLIT_OFFSET_CAPTURE | $flags | PREG_SPLIT_NO_EMPTY));
assertType('list<array{non-empty-string, int<0, max>}>|false', preg_split($pattern, $subject, $limit, PREG_SPLIT_OFFSET_CAPTURE | $flags | PREG_SPLIT_NO_EMPTY));
}

/**
* @param string $pattern
* @param string $subject
* @param int $limit
*/
public static function dynamicFlags($pattern, $subject, $limit = -1) {
public static function dynamicFlags($pattern, $subject, $limit = -1)
{
$flags = PREG_SPLIT_OFFSET_CAPTURE;

if ($subject === '1-2-3') {
Expand Down
Loading