diff --git a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/ATMConfig.qll b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/ATMConfig.qll index ed9ca5ce1c1a..9f8c066f4244 100644 --- a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/ATMConfig.qll +++ b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/ATMConfig.qll @@ -7,6 +7,7 @@ private import javascript as JS import EndpointTypes import EndpointCharacteristics as EndpointCharacteristics +import AdaptiveThreatModeling::ATM::ResultsInfo as AtmResultsInfo /** * EXPERIMENTAL. This API may change in the future. @@ -29,10 +30,23 @@ import EndpointCharacteristics as EndpointCharacteristics * `isAdditionalFlowStep` with a more generalised definition of additional edges. See * `NosqlInjectionATM.qll` for an example of doing this. */ -abstract class AtmConfig extends string { +abstract class AtmConfig extends JS::TaintTracking::Configuration { bindingset[this] AtmConfig() { any() } + /** + * Holds if `source` is a relevant taint source. When sources are not boosted, `isSource` is equivalent to + * `isKnownSource` (i.e there are no "effective" sources to be classified by an ML model). + */ + override predicate isSource(JS::DataFlow::Node source) { this.isKnownSource(source) } + + /** + * Holds if `sink` is a known taint sink or an "effective" sink (a candidate to be classified by an ML model). + */ + override predicate isSink(JS::DataFlow::Node sink) { + this.isKnownSink(sink) or this.isEffectiveSink(sink) + } + /** * EXPERIMENTAL. This API may change in the future. * @@ -127,6 +141,17 @@ abstract class AtmConfig extends string { * A cut-off value of 1 produces all alerts including those that are likely false-positives. */ float getScoreCutoff() { result = 0.0 } + + /** + * Holds if there's an ATM alert (a flow path from `source` to `sink` with ML-determined likelihood `score`) according + * to this ML-boosted configuration, whereas the unboosted base query does not contain this source and sink + * combination. + */ + predicate getAlerts(JS::DataFlow::PathNode source, JS::DataFlow::PathNode sink, float score) { + this.hasFlowPath(source, sink) and + not AtmResultsInfo::isFlowLikelyInBaseQuery(source.getNode(), sink.getNode()) and + score = AtmResultsInfo::getScoreForFlow(source.getNode(), sink.getNode()) + } } /** DEPRECATED: Alias for AtmConfig */ diff --git a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/NosqlInjectionATM.qll b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/NosqlInjectionATM.qll index f03631bfdcff..e6d602280a47 100644 --- a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/NosqlInjectionATM.qll +++ b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/NosqlInjectionATM.qll @@ -1,6 +1,7 @@ /** * For internal use only. * + * A taint-tracking configuration for reasoning about NoSQL injection vulnerabilities. * Defines shared code used by the NoSQL injection boosted query. */ @@ -10,62 +11,19 @@ private import semmle.javascript.security.dataflow.NosqlInjectionCustomizations import AdaptiveThreatModeling class NosqlInjectionAtmConfig extends AtmConfig { - NosqlInjectionAtmConfig() { this = "NosqlInjectionATMConfig" } + NosqlInjectionAtmConfig() { this = "NosqlInjectionAtmConfig" } override predicate isKnownSource(DataFlow::Node source) { source instanceof NosqlInjection::Source or TaintedObject::isSource(source, _) } override EndpointType getASinkEndpointType() { result instanceof NosqlInjectionSinkType } -} - -/** DEPRECATED: Alias for NosqlInjectionAtmConfig */ -deprecated class NosqlInjectionATMConfig = NosqlInjectionAtmConfig; - -/** Holds if src -> trg is an additional flow step in the non-boosted NoSql injection security query. */ -predicate isBaseAdditionalFlowStep( - DataFlow::Node src, DataFlow::Node trg, DataFlow::FlowLabel inlbl, DataFlow::FlowLabel outlbl -) { - TaintedObject::step(src, trg, inlbl, outlbl) - or - // additional flow step to track taint through NoSQL query objects - inlbl = TaintedObject::label() and - outlbl = TaintedObject::label() and - exists(NoSql::Query query, DataFlow::SourceNode queryObj | - queryObj.flowsTo(query) and - queryObj.flowsTo(trg) and - src = queryObj.getAPropertyWrite().getRhs() - ) -} - -/** - * Gets a value that is (transitively) written to `query`, where `query` is a NoSQL sink. - * - * This predicate allows us to propagate data flow through property writes and array constructors - * within a query object, enabling the security query to pick up NoSQL injection vulnerabilities - * involving more complex queries. - */ -DataFlow::Node getASubexpressionWithinQuery(DataFlow::Node query) { - any(NosqlInjectionAtmConfig cfg).isEffectiveSink(query) and - exists(DataFlow::SourceNode receiver | - receiver = [getASubexpressionWithinQuery(query), query].getALocalSource() - | - result = - [receiver.getAPropertyWrite().getRhs(), receiver.(DataFlow::ArrayCreationNode).getAnElement()] - ) -} - -/** - * A taint-tracking configuration for reasoning about NoSQL injection vulnerabilities. - * - * This is largely a copy of the taint tracking configuration for the standard NoSQL injection - * query, except additional ATM sinks have been added and the additional flow step has been - * generalised to cover the sinks predicted by ATM. - */ -class Configuration extends TaintTracking::Configuration { - Configuration() { this = "NosqlInjectionATM" } - override predicate isSource(DataFlow::Node source) { source instanceof NosqlInjection::Source } + /* + * This is largely a copy of the taint tracking configuration for the standard NoSQL injection + * query, except additional ATM sinks have been added and the additional flow step has been + * generalised to cover the sinks predicted by ATM. + */ override predicate isSource(DataFlow::Node source, DataFlow::FlowLabel label) { TaintedObject::isSource(source, label) @@ -75,7 +33,7 @@ class Configuration extends TaintTracking::Configuration { sink.(NosqlInjection::Sink).getAFlowLabel() = label or // Allow effective sinks to have any taint label - any(NosqlInjectionAtmConfig cfg).isEffectiveSink(sink) + isEffectiveSink(sink) } override predicate isSanitizer(DataFlow::Node node) { @@ -94,7 +52,43 @@ class Configuration extends TaintTracking::Configuration { isBaseAdditionalFlowStep(src, trg, inlbl, outlbl) or // relaxed version of previous step to track taint through unmodeled NoSQL query objects - any(NosqlInjectionAtmConfig cfg).isEffectiveSink(trg) and + isEffectiveSink(trg) and src = getASubexpressionWithinQuery(trg) } + + /** Holds if src -> trg is an additional flow step in the non-boosted NoSql injection security query. */ + private predicate isBaseAdditionalFlowStep( + DataFlow::Node src, DataFlow::Node trg, DataFlow::FlowLabel inlbl, DataFlow::FlowLabel outlbl + ) { + TaintedObject::step(src, trg, inlbl, outlbl) + or + // additional flow step to track taint through NoSQL query objects + inlbl = TaintedObject::label() and + outlbl = TaintedObject::label() and + exists(NoSql::Query query, DataFlow::SourceNode queryObj | + queryObj.flowsTo(query) and + queryObj.flowsTo(trg) and + src = queryObj.getAPropertyWrite().getRhs() + ) + } + + /** + * Gets a value that is (transitively) written to `query`, where `query` is a NoSQL sink. + * + * This predicate allows us to propagate data flow through property writes and array constructors + * within a query object, enabling the security query to pick up NoSQL injection vulnerabilities + * involving more complex queries. + */ + private DataFlow::Node getASubexpressionWithinQuery(DataFlow::Node query) { + isEffectiveSink(query) and + exists(DataFlow::SourceNode receiver | + receiver = [getASubexpressionWithinQuery(query), query].getALocalSource() + | + result = + [ + receiver.getAPropertyWrite().getRhs(), + receiver.(DataFlow::ArrayCreationNode).getAnElement() + ] + ) + } } diff --git a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/SqlInjectionATM.qll b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/SqlInjectionATM.qll index 14821404e6d0..917e79f401e4 100644 --- a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/SqlInjectionATM.qll +++ b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/SqlInjectionATM.qll @@ -1,6 +1,7 @@ /** * For internal use only. * + * A taint-tracking configuration for reasoning about SQL injection vulnerabilities. * Defines shared code used by the SQL injection boosted query. */ @@ -9,30 +10,16 @@ import semmle.javascript.security.dataflow.SqlInjectionCustomizations import AdaptiveThreatModeling class SqlInjectionAtmConfig extends AtmConfig { - SqlInjectionAtmConfig() { this = "SqlInjectionATMConfig" } + SqlInjectionAtmConfig() { this = "SqlInjectionAtmConfig" } override predicate isKnownSource(DataFlow::Node source) { source instanceof SqlInjection::Source } override EndpointType getASinkEndpointType() { result instanceof SqlInjectionSinkType } -} - -/** DEPRECATED: Alias for SqlInjectionAtmConfig */ -deprecated class SqlInjectionATMConfig = SqlInjectionAtmConfig; - -/** - * A taint-tracking configuration for reasoning about SQL injection vulnerabilities. - * - * This is largely a copy of the taint tracking configuration for the standard SQL injection - * query, except additional sinks have been added using the sink endpoint filter. - */ -class Configuration extends TaintTracking::Configuration { - Configuration() { this = "SqlInjectionATM" } - override predicate isSource(DataFlow::Node source) { source instanceof SqlInjection::Source } - - override predicate isSink(DataFlow::Node sink) { - sink instanceof SqlInjection::Sink or any(SqlInjectionAtmConfig cfg).isEffectiveSink(sink) - } + /* + * This is largely a copy of the taint tracking configuration for the standard SQL injection + * query, except additional sinks have been added using the sink endpoint filter. + */ override predicate isSanitizer(DataFlow::Node node) { super.isSanitizer(node) or diff --git a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/TaintedPathATM.qll b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/TaintedPathATM.qll index 302907820da7..55b295c72ba0 100644 --- a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/TaintedPathATM.qll +++ b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/TaintedPathATM.qll @@ -1,6 +1,7 @@ /** * For internal use only. * + * A taint-tracking configuration for reasoning about path injection vulnerabilities. * Defines shared code used by the path injection boosted query. */ @@ -9,32 +10,22 @@ import semmle.javascript.security.dataflow.TaintedPathCustomizations import AdaptiveThreatModeling class TaintedPathAtmConfig extends AtmConfig { - TaintedPathAtmConfig() { this = "TaintedPathATMConfig" } + TaintedPathAtmConfig() { this = "TaintedPathAtmConfig" } override predicate isKnownSource(DataFlow::Node source) { source instanceof TaintedPath::Source } override EndpointType getASinkEndpointType() { result instanceof TaintedPathSinkType } -} - -/** DEPRECATED: Alias for TaintedPathAtmConfig */ -deprecated class TaintedPathATMConfig = TaintedPathAtmConfig; - -/** - * A taint-tracking configuration for reasoning about path injection vulnerabilities. - * - * This is largely a copy of the taint tracking configuration for the standard path injection - * query, except additional ATM sinks have been added to the `isSink` predicate. - */ -class Configuration extends TaintTracking::Configuration { - Configuration() { this = "TaintedPathATM" } - override predicate isSource(DataFlow::Node source) { source instanceof TaintedPath::Source } + /* + * This is largely a copy of the taint tracking configuration for the standard path injection + * query, except additional ATM sinks have been added to the `isSink` predicate. + */ override predicate isSink(DataFlow::Node sink, DataFlow::FlowLabel label) { label = sink.(TaintedPath::Sink).getAFlowLabel() or // Allow effective sinks to have any taint label - any(TaintedPathAtmConfig cfg).isEffectiveSink(sink) + isEffectiveSink(sink) } override predicate isSanitizer(DataFlow::Node node) { node instanceof TaintedPath::Sanitizer } @@ -60,7 +51,7 @@ class Configuration extends TaintTracking::Configuration { * of barrier guards, we port the barrier guards for the boosted query from the standard library to * sanitizer guards here. */ -class BarrierGuardNodeAsSanitizerGuardNode extends TaintTracking::LabeledSanitizerGuardNode { +private class BarrierGuardNodeAsSanitizerGuardNode extends TaintTracking::LabeledSanitizerGuardNode { BarrierGuardNodeAsSanitizerGuardNode() { this instanceof TaintedPath::BarrierGuardNode } override predicate sanitizes(boolean outcome, Expr e) { diff --git a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/XssATM.qll b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/XssATM.qll index 2cc848a7ea9f..d28b669bf498 100644 --- a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/XssATM.qll +++ b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/XssATM.qll @@ -1,6 +1,7 @@ /** * For internal use only. * + * A taint-tracking configuration for reasoning about XSS vulnerabilities. * Defines shared code used by the XSS boosted query. */ @@ -9,31 +10,16 @@ private import semmle.javascript.security.dataflow.DomBasedXssCustomizations import AdaptiveThreatModeling class DomBasedXssAtmConfig extends AtmConfig { - DomBasedXssAtmConfig() { this = "DomBasedXssATMConfig" } + DomBasedXssAtmConfig() { this = "DomBasedXssAtmConfig" } override predicate isKnownSource(DataFlow::Node source) { source instanceof DomBasedXss::Source } override EndpointType getASinkEndpointType() { result instanceof XssSinkType } -} - -/** DEPRECATED: Alias for DomBasedXssAtmConfig */ -deprecated class DomBasedXssATMConfig = DomBasedXssAtmConfig; - -/** - * A taint-tracking configuration for reasoning about XSS vulnerabilities. - * - * This is largely a copy of the taint tracking configuration for the standard XSSThroughDom query, - * except additional ATM sinks have been added to the `isSink` predicate. - */ -class Configuration extends TaintTracking::Configuration { - Configuration() { this = "DomBasedXssATMConfiguration" } - override predicate isSource(DataFlow::Node source) { source instanceof DomBasedXss::Source } - - override predicate isSink(DataFlow::Node sink) { - sink instanceof DomBasedXss::Sink or - any(DomBasedXssAtmConfig cfg).isEffectiveSink(sink) - } + /* + * This is largely a copy of the taint tracking configuration for the standard XSSThroughDom query, + * except additional ATM sinks have been added to the `isSink` predicate. + */ override predicate isSanitizer(DataFlow::Node node) { super.isSanitizer(node) or diff --git a/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractEndpointDataTraining.qll b/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractEndpointDataTraining.qll index 73628c744cfe..325809e085fd 100644 --- a/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractEndpointDataTraining.qll +++ b/javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/extraction/ExtractEndpointDataTraining.qll @@ -206,13 +206,14 @@ query predicate reformattedTrainingEndpoints( * TODO: Delete this once we are no longer surfacing `hasFlowFromSource`. */ DataFlow::Configuration getDataFlowCfg(Query query) { - query instanceof NosqlInjectionQuery and result instanceof NosqlInjectionAtm::Configuration + query instanceof NosqlInjectionQuery and + result instanceof NosqlInjectionAtm::NosqlInjectionAtmConfig or - query instanceof SqlInjectionQuery and result instanceof SqlInjectionAtm::Configuration + query instanceof SqlInjectionQuery and result instanceof SqlInjectionAtm::SqlInjectionAtmConfig or - query instanceof TaintedPathQuery and result instanceof TaintedPathAtm::Configuration + query instanceof TaintedPathQuery and result instanceof TaintedPathAtm::TaintedPathAtmConfig or - query instanceof XssQuery and result instanceof XssAtm::Configuration + query instanceof XssQuery and result instanceof XssAtm::DomBasedXssAtmConfig } // TODO: Delete this once we are no longer surfacing `hasFlowFromSource`. diff --git a/javascript/ql/experimental/adaptivethreatmodeling/src/NosqlInjectionATM.ql b/javascript/ql/experimental/adaptivethreatmodeling/src/NosqlInjectionATM.ql index e35653fb96ac..8ec315c3d9c4 100644 --- a/javascript/ql/experimental/adaptivethreatmodeling/src/NosqlInjectionATM.ql +++ b/javascript/ql/experimental/adaptivethreatmodeling/src/NosqlInjectionATM.ql @@ -17,11 +17,8 @@ import ATM::ResultsInfo import DataFlow::PathGraph import experimental.adaptivethreatmodeling.NosqlInjectionATM -from DataFlow::Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink, float score -where - cfg.hasFlowPath(source, sink) and - not isFlowLikelyInBaseQuery(source.getNode(), sink.getNode()) and - score = getScoreForFlow(source.getNode(), sink.getNode()) +from AtmConfig cfg, DataFlow::PathNode source, DataFlow::PathNode sink, float score +where cfg.getAlerts(source, sink, score) select sink.getNode(), source, sink, "(Experimental) This may be a database query that depends on $@. Identified using machine learning.", source.getNode(), "a user-provided value", score diff --git a/javascript/ql/experimental/adaptivethreatmodeling/src/SqlInjectionATM.ql b/javascript/ql/experimental/adaptivethreatmodeling/src/SqlInjectionATM.ql index b58dd9f46094..169215657070 100644 --- a/javascript/ql/experimental/adaptivethreatmodeling/src/SqlInjectionATM.ql +++ b/javascript/ql/experimental/adaptivethreatmodeling/src/SqlInjectionATM.ql @@ -17,11 +17,8 @@ import experimental.adaptivethreatmodeling.SqlInjectionATM import ATM::ResultsInfo import DataFlow::PathGraph -from DataFlow::Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink, float score -where - cfg.hasFlowPath(source, sink) and - not isFlowLikelyInBaseQuery(source.getNode(), sink.getNode()) and - score = getScoreForFlow(source.getNode(), sink.getNode()) +from AtmConfig cfg, DataFlow::PathNode source, DataFlow::PathNode sink, float score +where cfg.getAlerts(source, sink, score) select sink.getNode(), source, sink, "(Experimental) This may be a database query that depends on $@. Identified using machine learning.", source.getNode(), "a user-provided value", score diff --git a/javascript/ql/experimental/adaptivethreatmodeling/src/TaintedPathATM.ql b/javascript/ql/experimental/adaptivethreatmodeling/src/TaintedPathATM.ql index 7e637687d75d..e4e4db0bf321 100644 --- a/javascript/ql/experimental/adaptivethreatmodeling/src/TaintedPathATM.ql +++ b/javascript/ql/experimental/adaptivethreatmodeling/src/TaintedPathATM.ql @@ -21,11 +21,8 @@ import ATM::ResultsInfo import DataFlow::PathGraph import experimental.adaptivethreatmodeling.TaintedPathATM -from DataFlow::Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink, float score -where - cfg.hasFlowPath(source, sink) and - not isFlowLikelyInBaseQuery(source.getNode(), sink.getNode()) and - score = getScoreForFlow(source.getNode(), sink.getNode()) +from AtmConfig cfg, DataFlow::PathNode source, DataFlow::PathNode sink, float score +where cfg.getAlerts(source, sink, score) select sink.getNode(), source, sink, "(Experimental) This may be a path that depends on $@. Identified using machine learning.", source.getNode(), "a user-provided value", score diff --git a/javascript/ql/experimental/adaptivethreatmodeling/src/XssATM.ql b/javascript/ql/experimental/adaptivethreatmodeling/src/XssATM.ql index d0e98c1cd54e..deac86922ab6 100644 --- a/javascript/ql/experimental/adaptivethreatmodeling/src/XssATM.ql +++ b/javascript/ql/experimental/adaptivethreatmodeling/src/XssATM.ql @@ -18,11 +18,8 @@ import ATM::ResultsInfo import DataFlow::PathGraph import experimental.adaptivethreatmodeling.XssATM -from DataFlow::Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink, float score -where - cfg.hasFlowPath(source, sink) and - not isFlowLikelyInBaseQuery(source.getNode(), sink.getNode()) and - score = getScoreForFlow(source.getNode(), sink.getNode()) +from AtmConfig cfg, DataFlow::PathNode source, DataFlow::PathNode sink, float score +where cfg.getAlerts(source, sink, score) select sink.getNode(), source, sink, "(Experimental) This may be a cross-site scripting vulnerability due to $@. Identified using machine learning.", source.getNode(), "a user-provided value", score