Skip to content

Commit e44e153

Browse files
Release build 11.37.0 [ci release]
1 parent 1e6e171 commit e44e153

34 files changed

+1212
-96
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ Sources/ContentScopeScripts/dist/
1212
test-results
1313
!Sources/ContentScopeScripts/dist/pages/.gitignore
1414

15+
# Test output files (generated during tests)
16+
injected/unit-test/fixtures/page-context/output/
17+
1518
# Local Netlify folder
1619
.netlify
1720
# VS Code user config

CHANGELOG.txt

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,2 @@
1-
- Improve logging for features (#2004)
2-
- Fix duckAi selection by checking subdomains (#2005)
3-
- Improve file annotation to include the inject name (#2003)
4-
- Add Robert to CODEOWNERS entry for page-context.js (#2012)
5-
- build(deps-dev): bump web-ext from 8.10.0 to 9.0.0 (#2007)
6-
- build(deps): bump github/codeql-action from 3 to 4 (#2009)
7-
- build(deps): bump stefanzweifel/git-auto-commit-action from 5 to 7 (#2010)
8-
- build(deps-dev): bump @typescript-eslint/eslint-plugin (#2008)
1+
- Trim links and include images (#2011)
2+
- Using gated logger for console.warn (#2013)

build/apple/contentScope.js

Lines changed: 51 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6987,7 +6987,7 @@ ul.messages {
69876987
break;
69886988
case "UNKNOWN":
69896989
default:
6990-
console.warn("No known pageType");
6990+
logger.log("No known pageType");
69916991
}
69926992
if (this.currentPage) {
69936993
this.currentPage.destroy();
@@ -10268,6 +10268,20 @@ ${truncatedWarning}
1026810268
return node.nodeType === Node.ELEMENT_NODE;
1026910269
}
1027010270
function getSameOriginIframeDocument(iframe) {
10271+
const src = iframe.src;
10272+
if (iframe.hasAttribute("sandbox") && !iframe.sandbox.contains("allow-scripts")) {
10273+
return null;
10274+
}
10275+
if (src && src !== "about:blank" && src !== "") {
10276+
try {
10277+
const iframeUrl = new URL(src, window.location.href);
10278+
if (iframeUrl.origin !== window.location.origin) {
10279+
return null;
10280+
}
10281+
} catch (e) {
10282+
return null;
10283+
}
10284+
}
1027110285
try {
1027210286
const doc = iframe.contentDocument;
1027310287
if (doc && doc.documentElement) {
@@ -10303,7 +10317,7 @@ ${truncatedWarning}
1030310317
if (!isHtmlElement(node)) {
1030410318
return "";
1030510319
}
10306-
if (!checkNodeIsVisible(node) || node.matches(settings.excludeSelectors)) {
10320+
if (!checkNodeIsVisible(node) || settings.excludeSelectors && node.matches(settings.excludeSelectors)) {
1030710321
return "";
1030810322
}
1030910323
const tag = node.tagName.toLowerCase();
@@ -10335,17 +10349,22 @@ ${truncatedWarning}
1033510349
`;
1033610350
case "br":
1033710351
return `
10352+
`;
10353+
case "img":
10354+
return `
10355+
![${getAttributeOrBlank(node, "alt")}](${getAttributeOrBlank(node, "src")})
1033810356
`;
1033910357
case "ul":
10358+
case "ol":
1034010359
return `
1034110360
${children}
1034210361
`;
1034310362
case "li":
1034410363
return `
10345-
- ${children.trim()}
10364+
- ${collapseAndTrim(children)}
1034610365
`;
1034710366
case "a":
10348-
return getLinkText(node);
10367+
return getLinkText(node, children, settings);
1034910368
case "iframe": {
1035010369
if (!settings.includeIframes) {
1035110370
return children;
@@ -10370,12 +10389,20 @@ ${iframeContent}
1037010389
return children;
1037110390
}
1037210391
}
10392+
function getAttributeOrBlank(node, attr) {
10393+
const attrValue = node.getAttribute(attr) ?? "";
10394+
return attrValue.trim();
10395+
}
1037310396
function collapseAndTrim(str) {
1037410397
return collapseWhitespace(str).trim();
1037510398
}
10376-
function getLinkText(node) {
10399+
function getLinkText(node, children, settings) {
1037710400
const href = node.getAttribute("href");
10378-
return href ? `[${collapseAndTrim(node.textContent)}](${href})` : collapseWhitespace(node.textContent);
10401+
const trimmedContent = collapseAndTrim(children);
10402+
if (settings.trimBlankLinks && trimmedContent.length === 0) {
10403+
return "";
10404+
}
10405+
return href ? `[${trimmedContent}](${href})` : collapseWhitespace(children);
1037910406
}
1038010407
var _cachedContent, _cachedTimestamp, _delayedRecheckTimer;
1038110408
var PageContext = class extends ContentFeature {
@@ -10606,6 +10633,8 @@ ${iframeContent}
1060610633
const maxDepth = this.getFeatureSetting("maxDepth") || 5e3;
1060710634
let excludeSelectors = this.getFeatureSetting("excludeSelectors") || [".ad", ".sidebar", ".footer", ".nav", ".header"];
1060810635
const excludedInertElements = this.getFeatureSetting("excludedInertElements") || [
10636+
"img",
10637+
// Note we're currently disabling images which we're handling in domToMarkdown (this can be per-site enabled in the config if needed).
1060910638
"script",
1061010639
"style",
1061110640
"link",
@@ -10623,18 +10652,26 @@ ${iframeContent}
1062310652
if (mainContent && mainContent.innerHTML.trim().length <= mainContentLength) {
1062410653
mainContent = null;
1062510654
}
10626-
const contentRoot = mainContent || document.body;
10627-
if (contentRoot) {
10628-
this.log.info("Getting main content", contentRoot);
10629-
content += domToMarkdown(contentRoot, {
10655+
let contentRoot = mainContent || document.body;
10656+
const extractContent = (root) => {
10657+
this.log.info("Getting content", root);
10658+
const result = domToMarkdown(root, {
1063010659
maxLength: upperLimit,
1063110660
maxDepth,
1063210661
includeIframes: this.getFeatureSettingEnabled("includeIframes", "enabled"),
10633-
excludeSelectors: excludeSelectorsString
10634-
});
10635-
this.log.info("Content markdown", content, contentRoot);
10662+
excludeSelectors: excludeSelectorsString,
10663+
trimBlankLinks: this.getFeatureSettingEnabled("trimBlankLinks", "enabled")
10664+
}).trim();
10665+
this.log.info("Content markdown", result, root);
10666+
return result;
10667+
};
10668+
if (contentRoot) {
10669+
content += extractContent(contentRoot);
10670+
}
10671+
if (content.length === 0 && contentRoot !== document.body && this.getFeatureSettingEnabled("bodyFallback", "enabled")) {
10672+
contentRoot = document.body;
10673+
content += extractContent(contentRoot);
1063610674
}
10637-
content = content.trim();
1063810675
this.fullContentLength = content.length;
1063910676
if (content.length > maxLength) {
1064010677
this.log.info("Truncating content", {

build/apple/contentScopeIsolated.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8613,7 +8613,7 @@ ul.messages {
86138613
break;
86148614
case "UNKNOWN":
86158615
default:
8616-
console.warn("No known pageType");
8616+
logger.log("No known pageType");
86178617
}
86188618
if (this.currentPage) {
86198619
this.currentPage.destroy();

build/integration/contentScope.js

Lines changed: 51 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14524,7 +14524,7 @@ ul.messages {
1452414524
break;
1452514525
case "UNKNOWN":
1452614526
default:
14527-
console.warn("No known pageType");
14527+
logger.log("No known pageType");
1452814528
}
1452914529
if (this.currentPage) {
1453014530
this.currentPage.destroy();
@@ -21829,6 +21829,20 @@ ${truncatedWarning}
2182921829
return node.nodeType === Node.ELEMENT_NODE;
2183021830
}
2183121831
function getSameOriginIframeDocument(iframe) {
21832+
const src = iframe.src;
21833+
if (iframe.hasAttribute("sandbox") && !iframe.sandbox.contains("allow-scripts")) {
21834+
return null;
21835+
}
21836+
if (src && src !== "about:blank" && src !== "") {
21837+
try {
21838+
const iframeUrl = new URL(src, window.location.href);
21839+
if (iframeUrl.origin !== window.location.origin) {
21840+
return null;
21841+
}
21842+
} catch (e) {
21843+
return null;
21844+
}
21845+
}
2183221846
try {
2183321847
const doc = iframe.contentDocument;
2183421848
if (doc && doc.documentElement) {
@@ -21864,7 +21878,7 @@ ${truncatedWarning}
2186421878
if (!isHtmlElement(node)) {
2186521879
return "";
2186621880
}
21867-
if (!checkNodeIsVisible(node) || node.matches(settings.excludeSelectors)) {
21881+
if (!checkNodeIsVisible(node) || settings.excludeSelectors && node.matches(settings.excludeSelectors)) {
2186821882
return "";
2186921883
}
2187021884
const tag = node.tagName.toLowerCase();
@@ -21896,17 +21910,22 @@ ${truncatedWarning}
2189621910
`;
2189721911
case "br":
2189821912
return `
21913+
`;
21914+
case "img":
21915+
return `
21916+
![${getAttributeOrBlank(node, "alt")}](${getAttributeOrBlank(node, "src")})
2189921917
`;
2190021918
case "ul":
21919+
case "ol":
2190121920
return `
2190221921
${children}
2190321922
`;
2190421923
case "li":
2190521924
return `
21906-
- ${children.trim()}
21925+
- ${collapseAndTrim(children)}
2190721926
`;
2190821927
case "a":
21909-
return getLinkText(node);
21928+
return getLinkText(node, children, settings);
2191021929
case "iframe": {
2191121930
if (!settings.includeIframes) {
2191221931
return children;
@@ -21931,12 +21950,20 @@ ${iframeContent}
2193121950
return children;
2193221951
}
2193321952
}
21953+
function getAttributeOrBlank(node, attr) {
21954+
const attrValue = node.getAttribute(attr) ?? "";
21955+
return attrValue.trim();
21956+
}
2193421957
function collapseAndTrim(str) {
2193521958
return collapseWhitespace(str).trim();
2193621959
}
21937-
function getLinkText(node) {
21960+
function getLinkText(node, children, settings) {
2193821961
const href = node.getAttribute("href");
21939-
return href ? `[${collapseAndTrim(node.textContent)}](${href})` : collapseWhitespace(node.textContent);
21962+
const trimmedContent = collapseAndTrim(children);
21963+
if (settings.trimBlankLinks && trimmedContent.length === 0) {
21964+
return "";
21965+
}
21966+
return href ? `[${trimmedContent}](${href})` : collapseWhitespace(children);
2194021967
}
2194121968
var _cachedContent, _cachedTimestamp, _delayedRecheckTimer;
2194221969
var PageContext = class extends ContentFeature {
@@ -22167,6 +22194,8 @@ ${iframeContent}
2216722194
const maxDepth = this.getFeatureSetting("maxDepth") || 5e3;
2216822195
let excludeSelectors = this.getFeatureSetting("excludeSelectors") || [".ad", ".sidebar", ".footer", ".nav", ".header"];
2216922196
const excludedInertElements = this.getFeatureSetting("excludedInertElements") || [
22197+
"img",
22198+
// Note we're currently disabling images which we're handling in domToMarkdown (this can be per-site enabled in the config if needed).
2217022199
"script",
2217122200
"style",
2217222201
"link",
@@ -22184,18 +22213,26 @@ ${iframeContent}
2218422213
if (mainContent && mainContent.innerHTML.trim().length <= mainContentLength) {
2218522214
mainContent = null;
2218622215
}
22187-
const contentRoot = mainContent || document.body;
22188-
if (contentRoot) {
22189-
this.log.info("Getting main content", contentRoot);
22190-
content += domToMarkdown(contentRoot, {
22216+
let contentRoot = mainContent || document.body;
22217+
const extractContent = (root) => {
22218+
this.log.info("Getting content", root);
22219+
const result = domToMarkdown(root, {
2219122220
maxLength: upperLimit,
2219222221
maxDepth,
2219322222
includeIframes: this.getFeatureSettingEnabled("includeIframes", "enabled"),
22194-
excludeSelectors: excludeSelectorsString
22195-
});
22196-
this.log.info("Content markdown", content, contentRoot);
22223+
excludeSelectors: excludeSelectorsString,
22224+
trimBlankLinks: this.getFeatureSettingEnabled("trimBlankLinks", "enabled")
22225+
}).trim();
22226+
this.log.info("Content markdown", result, root);
22227+
return result;
22228+
};
22229+
if (contentRoot) {
22230+
content += extractContent(contentRoot);
22231+
}
22232+
if (content.length === 0 && contentRoot !== document.body && this.getFeatureSettingEnabled("bodyFallback", "enabled")) {
22233+
contentRoot = document.body;
22234+
content += extractContent(contentRoot);
2219722235
}
22198-
content = content.trim();
2219922236
this.fullContentLength = content.length;
2220022237
if (content.length > maxLength) {
2220122238
this.log.info("Truncating content", {

0 commit comments

Comments
 (0)