Sitemap should only exclude 404 and 500 pages (#7655)

* fix(#7472): sitemap should only exclude 404 and 500 pages

* chore: refactor logic, add test
This commit is contained in:
Nate Moore 2023-07-14 14:30:33 -05:00 committed by GitHub
parent 795d598aeb
commit c258492b72
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 58 additions and 27 deletions

View file

@ -0,0 +1,5 @@
---
'@astrojs/sitemap': minor
---
Ensure sitemap only excludes numerical pages matching `/404` and `/500` exactly

View file

@ -2,13 +2,11 @@ import type { EnumChangefreq } from 'sitemap';
import type { SitemapItem, SitemapOptions } from './index.js';
import { parseUrl } from './utils/parse-url.js';
const STATUS_CODE_PAGE_REGEXP = /\/[0-9]{3}\/?$/;
/** Construct sitemap.xml given a set of URLs */
export function generateSitemap(pages: string[], finalSiteUrl: string, opts: SitemapOptions) {
const { changefreq, priority, lastmod: lastmodSrc, i18n } = opts!;
// TODO: find way to respect <link rel="canonical"> URLs here
const urls = [...pages].filter((url) => !STATUS_CODE_PAGE_REGEXP.test(url));
const urls = [...pages];
urls.sort((a, b) => a.localeCompare(b, 'en', { numeric: true })); // sort alphabetically so sitemap is same each time
const lastmod = lastmodSrc?.toISOString();

View file

@ -49,6 +49,7 @@ function formatConfigErrorMessage(err: ZodError) {
const PKG_NAME = '@astrojs/sitemap';
const OUTFILE = 'sitemap-index.xml';
const STATUS_CODE_PAGES = new Set(['/404', '/500']);
const createPlugin = (options?: SitemapOptions): AstroIntegration => {
let config: AstroConfig;
@ -85,7 +86,7 @@ const createPlugin = (options?: SitemapOptions): AstroIntegration => {
return;
}
let pageUrls = pages.map((p) => {
let pageUrls = pages.filter((p) => !STATUS_CODE_PAGES.has('/' + p.pathname.slice(0, -1))).map((p) => {
if (p.pathname !== '' && !finalSiteUrl.pathname.endsWith('/'))
finalSiteUrl.pathname += '/';
const path = finalSiteUrl.pathname + p.pathname;
@ -97,6 +98,7 @@ const createPlugin = (options?: SitemapOptions): AstroIntegration => {
* Dynamic URLs have entries with `undefined` pathnames
*/
if (r.pathname) {
if (STATUS_CODE_PAGES.has(r.pathname)) return urls;
/**
* remove the initial slash from relative pathname
* because `finalSiteUrl` always has trailing slash

View file

@ -12,7 +12,7 @@ describe('Filter support', () => {
root: './fixtures/static/',
integrations: [
sitemap({
filter: (page) => page !== 'http://example.com/two/',
filter: (page) => page === 'http://example.com/one/',
}),
],
});
@ -32,7 +32,7 @@ describe('Filter support', () => {
root: './fixtures/ssr/',
integrations: [
sitemap({
filter: (page) => page !== 'http://example.com/two/',
filter: (page) => page === 'http://example.com/one/',
}),
],
});

View file

@ -0,0 +1,8 @@
<html>
<head>
<title>123</title>
</head>
<body>
<h1>123</h1>
</body>
</html>

View file

@ -0,0 +1,8 @@
<html>
<head>
<title>404</title>
</head>
<body>
<h1>404</h1>
</body>
</html>

View file

@ -4,19 +4,29 @@ import { expect } from 'chai';
describe('getStaticPaths support', () => {
/** @type {import('./test-utils.js').Fixture} */
let fixture;
/** @type {string[]} */
let urls;
before(async () => {
fixture = await loadFixture({
root: './fixtures/static/',
});
await fixture.build();
});
it('getStaticPath pages require zero config', async () => {
const data = await readXML(fixture.readFile('/sitemap-0.xml'));
const urls = data.urlset.url;
urls = data.urlset.url.map(url => url.loc[0]);
});
expect(urls[0].loc[0]).to.equal('http://example.com/one/');
expect(urls[1].loc[0]).to.equal('http://example.com/two/');
it('requires zero config for getStaticPaths', async () => {
expect(urls).to.include('http://example.com/one/');
expect(urls).to.include('http://example.com/two/');
});
it('does not include 404 pages', () => {
expect(urls).to.not.include('http://example.com/404/');
});
it('includes numerical pages', () => {
expect(urls).to.include('http://example.com/123/');
})
});