Skip to content

Commit 728fc0f

Browse files
committed
Fix postgres_fdw to check shippability of sort clauses properly.
postgres_fdw would push ORDER BY clauses to the remote side without verifying that the sort operator is safe to ship. Moreover, it failed to print a suitable USING clause if the sort operator isn't default for the sort expression's type. The net result of this is that the remote sort might not have anywhere near the semantics we expect, which'd be disastrous for locally-performed merge joins in particular. We addressed similar issues in the context of ORDER BY within an aggregate function call in commit 7012b13, but failed to notice that query-level ORDER BY was broken. Thus, much of the necessary logic already existed, but it requires refactoring to be usable in both cases. Back-patch to all supported branches. In HEAD only, remove the core code's copy of find_em_expr_for_rel, which is no longer used and really should never have been pushed into equivclass.c in the first place. Ronan Dunklau, per report from David Rowley; reviews by David Rowley, Ranier Vilela, and myself Discussion: https://postgr.es/m/CAApHDvr4OeC2DBVY--zVP83-K=bYrTD7F8SZDhN4g+pj2f2S-A@mail.gmail.com
1 parent a6bd1f0 commit 728fc0f

File tree

5 files changed

+181
-73
lines changed

5 files changed

+181
-73
lines changed

contrib/postgres_fdw/deparse.c

Lines changed: 114 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
#include "catalog/pg_collation.h"
4343
#include "catalog/pg_namespace.h"
4444
#include "catalog/pg_operator.h"
45+
#include "catalog/pg_opfamily.h"
4546
#include "catalog/pg_proc.h"
4647
#include "catalog/pg_type.h"
4748
#include "commands/defrem.h"
@@ -175,6 +176,8 @@ static void deparseRangeTblRef(StringInfo buf, PlannerInfo *root,
175176
List **params_list);
176177
static void deparseAggref(Aggref *node, deparse_expr_cxt *context);
177178
static void appendGroupByClause(List *tlist, deparse_expr_cxt *context);
179+
static void appendOrderBySuffix(Oid sortop, Oid sortcoltype, bool nulls_first,
180+
deparse_expr_cxt *context);
178181
static void appendAggOrderBy(List *orderList, List *targetList,
179182
deparse_expr_cxt *context);
180183
static void appendFunctionName(Oid funcid, deparse_expr_cxt *context);
@@ -901,6 +904,33 @@ is_foreign_param(PlannerInfo *root,
901904
return false;
902905
}
903906

907+
/*
908+
* Returns true if it's safe to push down the sort expression described by
909+
* 'pathkey' to the foreign server.
910+
*/
911+
bool
912+
is_foreign_pathkey(PlannerInfo *root,
913+
RelOptInfo *baserel,
914+
PathKey *pathkey)
915+
{
916+
EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
917+
PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) baserel->fdw_private;
918+
919+
/*
920+
* is_foreign_expr would detect volatile expressions as well, but checking
921+
* ec_has_volatile here saves some cycles.
922+
*/
923+
if (pathkey_ec->ec_has_volatile)
924+
return false;
925+
926+
/* can't push down the sort if the pathkey's opfamily is not shippable */
927+
if (!is_shippable(pathkey->pk_opfamily, OperatorFamilyRelationId, fpinfo))
928+
return false;
929+
930+
/* can push if a suitable EC member exists */
931+
return (find_em_for_rel(root, pathkey_ec, baserel) != NULL);
932+
}
933+
904934
/*
905935
* Convert type OID + typmod info into a type name we can ship to the remote
906936
* server. Someplace else had better have verified that this type name is
@@ -2910,44 +2940,59 @@ appendAggOrderBy(List *orderList, List *targetList, deparse_expr_cxt *context)
29102940
{
29112941
SortGroupClause *srt = (SortGroupClause *) lfirst(lc);
29122942
Node *sortexpr;
2913-
Oid sortcoltype;
2914-
TypeCacheEntry *typentry;
29152943

29162944
if (!first)
29172945
appendStringInfoString(buf, ", ");
29182946
first = false;
29192947

2948+
/* Deparse the sort expression proper. */
29202949
sortexpr = deparseSortGroupClause(srt->tleSortGroupRef, targetList,
29212950
false, context);
2922-
sortcoltype = exprType(sortexpr);
2923-
/* See whether operator is default < or > for datatype */
2924-
typentry = lookup_type_cache(sortcoltype,
2925-
TYPECACHE_LT_OPR | TYPECACHE_GT_OPR);
2926-
if (srt->sortop == typentry->lt_opr)
2927-
appendStringInfoString(buf, " ASC");
2928-
else if (srt->sortop == typentry->gt_opr)
2929-
appendStringInfoString(buf, " DESC");
2930-
else
2931-
{
2932-
HeapTuple opertup;
2933-
Form_pg_operator operform;
2934-
2935-
appendStringInfoString(buf, " USING ");
2936-
2937-
/* Append operator name. */
2938-
opertup = SearchSysCache1(OPEROID, ObjectIdGetDatum(srt->sortop));
2939-
if (!HeapTupleIsValid(opertup))
2940-
elog(ERROR, "cache lookup failed for operator %u", srt->sortop);
2941-
operform = (Form_pg_operator) GETSTRUCT(opertup);
2942-
deparseOperatorName(buf, operform);
2943-
ReleaseSysCache(opertup);
2944-
}
2951+
/* Add decoration as needed. */
2952+
appendOrderBySuffix(srt->sortop, exprType(sortexpr), srt->nulls_first,
2953+
context);
2954+
}
2955+
}
29452956

2946-
if (srt->nulls_first)
2947-
appendStringInfoString(buf, " NULLS FIRST");
2948-
else
2949-
appendStringInfoString(buf, " NULLS LAST");
2957+
/*
2958+
* Append the ASC, DESC, USING <OPERATOR> and NULLS FIRST / NULLS LAST parts
2959+
* of an ORDER BY clause.
2960+
*/
2961+
static void
2962+
appendOrderBySuffix(Oid sortop, Oid sortcoltype, bool nulls_first,
2963+
deparse_expr_cxt *context)
2964+
{
2965+
StringInfo buf = context->buf;
2966+
TypeCacheEntry *typentry;
2967+
2968+
/* See whether operator is default < or > for sort expr's datatype. */
2969+
typentry = lookup_type_cache(sortcoltype,
2970+
TYPECACHE_LT_OPR | TYPECACHE_GT_OPR);
2971+
2972+
if (sortop == typentry->lt_opr)
2973+
appendStringInfoString(buf, " ASC");
2974+
else if (sortop == typentry->gt_opr)
2975+
appendStringInfoString(buf, " DESC");
2976+
else
2977+
{
2978+
HeapTuple opertup;
2979+
Form_pg_operator operform;
2980+
2981+
appendStringInfoString(buf, " USING ");
2982+
2983+
/* Append operator name. */
2984+
opertup = SearchSysCache1(OPEROID, ObjectIdGetDatum(sortop));
2985+
if (!HeapTupleIsValid(opertup))
2986+
elog(ERROR, "cache lookup failed for operator %u", sortop);
2987+
operform = (Form_pg_operator) GETSTRUCT(opertup);
2988+
deparseOperatorName(buf, operform);
2989+
ReleaseSysCache(opertup);
29502990
}
2991+
2992+
if (nulls_first)
2993+
appendStringInfoString(buf, " NULLS FIRST");
2994+
else
2995+
appendStringInfoString(buf, " NULLS LAST");
29512996
}
29522997

29532998
/*
@@ -3030,17 +3075,17 @@ appendGroupByClause(List *tlist, deparse_expr_cxt *context)
30303075
}
30313076

30323077
/*
3033-
* Deparse ORDER BY clause according to the given pathkeys for given base
3034-
* relation. From given pathkeys expressions belonging entirely to the given
3035-
* base relation are obtained and deparsed.
3078+
* Deparse ORDER BY clause defined by the given pathkeys.
3079+
*
3080+
* We find a suitable pathkey expression (some earlier step
3081+
* should have verified that there is one) and deparse it.
30363082
*/
30373083
static void
30383084
appendOrderByClause(List *pathkeys, deparse_expr_cxt *context)
30393085
{
30403086
ListCell *lcell;
30413087
int nestlevel;
3042-
char *delim = " ";
3043-
RelOptInfo *baserel = context->scanrel;
3088+
const char *delim = " ";
30443089
StringInfo buf = context->buf;
30453090

30463091
/* Make sure any constants in the exprs are printed portably */
@@ -3050,22 +3095,47 @@ appendOrderByClause(List *pathkeys, deparse_expr_cxt *context)
30503095
foreach(lcell, pathkeys)
30513096
{
30523097
PathKey *pathkey = lfirst(lcell);
3098+
EquivalenceMember *em;
30533099
Expr *em_expr;
3100+
Oid oprid;
30543101

3055-
em_expr = find_em_expr_for_rel(pathkey->pk_eclass, baserel);
3056-
Assert(em_expr != NULL);
3102+
em = find_em_for_rel(context->root,
3103+
pathkey->pk_eclass,
3104+
context->scanrel);
3105+
3106+
/*
3107+
* We don't expect any error here; it would mean that shippability
3108+
* wasn't verified earlier. For the same reason, we don't recheck
3109+
* shippability of the sort operator.
3110+
*/
3111+
if (em == NULL)
3112+
elog(ERROR, "could not find pathkey item to sort");
3113+
3114+
em_expr = em->em_expr;
3115+
3116+
/*
3117+
* Lookup the operator corresponding to the strategy in the opclass.
3118+
* The datatype used by the opfamily is not necessarily the same as
3119+
* the expression type (for array types for example).
3120+
*/
3121+
oprid = get_opfamily_member(pathkey->pk_opfamily,
3122+
em->em_datatype,
3123+
em->em_datatype,
3124+
pathkey->pk_strategy);
3125+
if (!OidIsValid(oprid))
3126+
elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
3127+
pathkey->pk_strategy, em->em_datatype, em->em_datatype,
3128+
pathkey->pk_opfamily);
30573129

30583130
appendStringInfoString(buf, delim);
30593131
deparseExpr(em_expr, context);
3060-
if (pathkey->pk_strategy == BTLessStrategyNumber)
3061-
appendStringInfoString(buf, " ASC");
3062-
else
3063-
appendStringInfoString(buf, " DESC");
30643132

3065-
if (pathkey->pk_nulls_first)
3066-
appendStringInfoString(buf, " NULLS FIRST");
3067-
else
3068-
appendStringInfoString(buf, " NULLS LAST");
3133+
/*
3134+
* Here we need to use the expression's actual type to discover
3135+
* whether the desired operator will be the default or not.
3136+
*/
3137+
appendOrderBySuffix(oprid, exprType((Node *) em_expr),
3138+
pathkey->pk_nulls_first, context);
30693139

30703140
delim = ", ";
30713141
}

contrib/postgres_fdw/expected/postgres_fdw.out

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3252,6 +3252,19 @@ select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6
32523252
Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6))
32533253
(6 rows)
32543254

3255+
-- This should not be pushed either.
3256+
explain (verbose, costs off)
3257+
select * from ft2 order by c1 using operator(public.<^);
3258+
QUERY PLAN
3259+
-------------------------------------------------------------------------------
3260+
Sort
3261+
Output: c1, c2, c3, c4, c5, c6, c7, c8
3262+
Sort Key: ft2.c1 USING <^
3263+
-> Foreign Scan on public.ft2
3264+
Output: c1, c2, c3, c4, c5, c6, c7, c8
3265+
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1"
3266+
(6 rows)
3267+
32553268
-- Add into extension
32563269
alter extension postgres_fdw add operator class my_op_class using btree;
32573270
alter extension postgres_fdw add function my_op_cmp(a int, b int);
@@ -3277,6 +3290,16 @@ select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6
32773290
{6,16,26,36,46,56,66,76,86,96}
32783291
(1 row)
32793292

3293+
-- This should be pushed too.
3294+
explain (verbose, costs off)
3295+
select * from ft2 order by c1 using operator(public.<^);
3296+
QUERY PLAN
3297+
-----------------------------------------------------------------------------------------------------------------------------
3298+
Foreign Scan on public.ft2
3299+
Output: c1, c2, c3, c4, c5, c6, c7, c8
3300+
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" ORDER BY "C 1" USING OPERATOR(public.<^) NULLS LAST
3301+
(3 rows)
3302+
32803303
-- Remove from extension
32813304
alter extension postgres_fdw drop operator class my_op_class using btree;
32823305
alter extension postgres_fdw drop function my_op_cmp(a int, b int);

contrib/postgres_fdw/postgres_fdw.c

Lines changed: 30 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "access/htup_details.h"
1818
#include "access/sysattr.h"
1919
#include "catalog/pg_class.h"
20+
#include "catalog/pg_opfamily.h"
2021
#include "commands/defrem.h"
2122
#include "commands/explain.h"
2223
#include "commands/vacuum.h"
@@ -788,22 +789,15 @@ get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel)
788789
foreach(lc, root->query_pathkeys)
789790
{
790791
PathKey *pathkey = (PathKey *) lfirst(lc);
791-
EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
792-
Expr *em_expr;
793792

794793
/*
795794
* The planner and executor don't have any clever strategy for
796795
* taking data sorted by a prefix of the query's pathkeys and
797796
* getting it to be sorted by all of those pathkeys. We'll just
798797
* end up resorting the entire data set. So, unless we can push
799798
* down all of the query pathkeys, forget it.
800-
*
801-
* is_foreign_expr would detect volatile expressions as well, but
802-
* checking ec_has_volatile here saves some cycles.
803799
*/
804-
if (pathkey_ec->ec_has_volatile ||
805-
!(em_expr = find_em_expr_for_rel(pathkey_ec, rel)) ||
806-
!is_foreign_expr(root, rel, em_expr))
800+
if (!is_foreign_pathkey(root, rel, pathkey))
807801
{
808802
query_pathkeys_ok = false;
809803
break;
@@ -847,16 +841,19 @@ get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel)
847841
foreach(lc, useful_eclass_list)
848842
{
849843
EquivalenceClass *cur_ec = lfirst(lc);
850-
Expr *em_expr;
851844
PathKey *pathkey;
852845

853846
/* If redundant with what we did above, skip it. */
854847
if (cur_ec == query_ec)
855848
continue;
856849

850+
/* Can't push down the sort if the EC's opfamily is not shippable. */
851+
if (!is_shippable(linitial_oid(cur_ec->ec_opfamilies),
852+
OperatorFamilyRelationId, fpinfo))
853+
continue;
854+
857855
/* If no pushable expression for this rel, skip it. */
858-
em_expr = find_em_expr_for_rel(cur_ec, rel);
859-
if (em_expr == NULL || !is_foreign_expr(root, rel, em_expr))
856+
if (find_em_for_rel(root, cur_ec, rel) == NULL)
860857
continue;
861858

862859
/* Looks like we can generate a pathkey, so let's do it. */
@@ -5247,30 +5244,35 @@ conversion_error_callback(void *arg)
52475244
}
52485245

52495246
/*
5250-
* Find an equivalence class member expression, all of whose Vars, come from
5251-
* the indicated relation.
5247+
* Given an EquivalenceClass and a foreign relation, find an EC member
5248+
* that can be used to sort the relation remotely according to a pathkey
5249+
* using this EC.
5250+
*
5251+
* If there is more than one suitable candidate, return an arbitrary
5252+
* one of them. If there is none, return NULL.
5253+
*
5254+
* This checks that the EC member expression uses only Vars from the given
5255+
* rel and is shippable. Caller must separately verify that the pathkey's
5256+
* ordering operator is shippable.
52525257
*/
5253-
extern Expr *
5254-
find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel)
5258+
EquivalenceMember *
5259+
find_em_for_rel(PlannerInfo *root, EquivalenceClass *ec, RelOptInfo *rel)
52555260
{
5256-
ListCell *lc_em;
5261+
ListCell *lc;
52575262

5258-
foreach(lc_em, ec->ec_members)
5263+
foreach(lc, ec->ec_members)
52595264
{
5260-
EquivalenceMember *em = lfirst(lc_em);
5265+
EquivalenceMember *em = (EquivalenceMember *) lfirst(lc);
52615266

5267+
/*
5268+
* Note we require !bms_is_empty, else we'd accept constant
5269+
* expressions which are not suitable for the purpose.
5270+
*/
52625271
if (bms_is_subset(em->em_relids, rel->relids) &&
5263-
!bms_is_empty(em->em_relids))
5264-
{
5265-
/*
5266-
* If there is more than one equivalence member whose Vars are
5267-
* taken entirely from this relation, we'll be content to choose
5268-
* any one of those.
5269-
*/
5270-
return em->em_expr;
5271-
}
5272+
!bms_is_empty(em->em_relids) &&
5273+
is_foreign_expr(root, rel, em->em_expr))
5274+
return em;
52725275
}
52735276

5274-
/* We didn't find any suitable equivalence class expression */
52755277
return NULL;
52765278
}

contrib/postgres_fdw/postgres_fdw.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,9 @@ extern bool is_foreign_expr(PlannerInfo *root,
143143
extern bool is_foreign_param(PlannerInfo *root,
144144
RelOptInfo *baserel,
145145
Expr *expr);
146+
extern bool is_foreign_pathkey(PlannerInfo *root,
147+
RelOptInfo *baserel,
148+
PathKey *pathkey);
146149
extern void deparseInsertSql(StringInfo buf, PlannerInfo *root,
147150
Index rtindex, Relation rel,
148151
List *targetAttrs, bool doNothing, List *returningList,
@@ -173,7 +176,9 @@ extern void deparseAnalyzeSizeSql(StringInfo buf, Relation rel);
173176
extern void deparseAnalyzeSql(StringInfo buf, Relation rel,
174177
List **retrieved_attrs);
175178
extern void deparseStringLiteral(StringInfo buf, const char *val);
176-
extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel);
179+
extern EquivalenceMember *find_em_for_rel(PlannerInfo *root,
180+
EquivalenceClass *ec,
181+
RelOptInfo *rel);
177182
extern List *build_tlist_to_deparse(RelOptInfo *foreignrel);
178183
extern void deparseSelectStmtForRel(StringInfo buf, PlannerInfo *root,
179184
RelOptInfo *foreignrel, List *tlist,

0 commit comments

Comments
 (0)