Sitemap should only exclude 404 and 500 pages (#7655)
* fix(#7472): sitemap should only exclude 404 and 500 pages * chore: refactor logic, add test
This commit is contained in:
parent
795d598aeb
commit
c258492b72
7 changed files with 58 additions and 27 deletions
5
.changeset/chatty-pigs-film.md
Normal file
5
.changeset/chatty-pigs-film.md
Normal file
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
'@astrojs/sitemap': minor
|
||||
---
|
||||
|
||||
Ensure sitemap only excludes numerical pages matching `/404` and `/500` exactly
|
|
@ -2,13 +2,11 @@ import type { EnumChangefreq } from 'sitemap';
|
|||
import type { SitemapItem, SitemapOptions } from './index.js';
|
||||
import { parseUrl } from './utils/parse-url.js';
|
||||
|
||||
const STATUS_CODE_PAGE_REGEXP = /\/[0-9]{3}\/?$/;
|
||||
|
||||
/** Construct sitemap.xml given a set of URLs */
|
||||
export function generateSitemap(pages: string[], finalSiteUrl: string, opts: SitemapOptions) {
|
||||
const { changefreq, priority, lastmod: lastmodSrc, i18n } = opts!;
|
||||
// TODO: find way to respect <link rel="canonical"> URLs here
|
||||
const urls = [...pages].filter((url) => !STATUS_CODE_PAGE_REGEXP.test(url));
|
||||
const urls = [...pages];
|
||||
urls.sort((a, b) => a.localeCompare(b, 'en', { numeric: true })); // sort alphabetically so sitemap is same each time
|
||||
|
||||
const lastmod = lastmodSrc?.toISOString();
|
||||
|
|
|
@ -22,24 +22,24 @@ export type LinkItem = LinkItemBase;
|
|||
|
||||
export type SitemapOptions =
|
||||
| {
|
||||
filter?(page: string): boolean;
|
||||
customPages?: string[];
|
||||
filter?(page: string): boolean;
|
||||
customPages?: string[];
|
||||
|
||||
i18n?: {
|
||||
defaultLocale: string;
|
||||
locales: Record<string, string>;
|
||||
};
|
||||
// number of entries per sitemap file
|
||||
entryLimit?: number;
|
||||
i18n?: {
|
||||
defaultLocale: string;
|
||||
locales: Record<string, string>;
|
||||
};
|
||||
// number of entries per sitemap file
|
||||
entryLimit?: number;
|
||||
|
||||
// sitemap specific
|
||||
changefreq?: ChangeFreq;
|
||||
lastmod?: Date;
|
||||
priority?: number;
|
||||
// sitemap specific
|
||||
changefreq?: ChangeFreq;
|
||||
lastmod?: Date;
|
||||
priority?: number;
|
||||
|
||||
// called for each sitemap item just before to save them on disk, sync or async
|
||||
serialize?(item: SitemapItem): SitemapItem | Promise<SitemapItem | undefined> | undefined;
|
||||
}
|
||||
// called for each sitemap item just before to save them on disk, sync or async
|
||||
serialize?(item: SitemapItem): SitemapItem | Promise<SitemapItem | undefined> | undefined;
|
||||
}
|
||||
| undefined;
|
||||
|
||||
function formatConfigErrorMessage(err: ZodError) {
|
||||
|
@ -49,6 +49,7 @@ function formatConfigErrorMessage(err: ZodError) {
|
|||
|
||||
const PKG_NAME = '@astrojs/sitemap';
|
||||
const OUTFILE = 'sitemap-index.xml';
|
||||
const STATUS_CODE_PAGES = new Set(['/404', '/500']);
|
||||
|
||||
const createPlugin = (options?: SitemapOptions): AstroIntegration => {
|
||||
let config: AstroConfig;
|
||||
|
@ -85,7 +86,7 @@ const createPlugin = (options?: SitemapOptions): AstroIntegration => {
|
|||
return;
|
||||
}
|
||||
|
||||
let pageUrls = pages.map((p) => {
|
||||
let pageUrls = pages.filter((p) => !STATUS_CODE_PAGES.has('/' + p.pathname.slice(0, -1))).map((p) => {
|
||||
if (p.pathname !== '' && !finalSiteUrl.pathname.endsWith('/'))
|
||||
finalSiteUrl.pathname += '/';
|
||||
const path = finalSiteUrl.pathname + p.pathname;
|
||||
|
@ -97,6 +98,7 @@ const createPlugin = (options?: SitemapOptions): AstroIntegration => {
|
|||
* Dynamic URLs have entries with `undefined` pathnames
|
||||
*/
|
||||
if (r.pathname) {
|
||||
if (STATUS_CODE_PAGES.has(r.pathname)) return urls;
|
||||
/**
|
||||
* remove the initial slash from relative pathname
|
||||
* because `finalSiteUrl` always has trailing slash
|
||||
|
|
|
@ -12,7 +12,7 @@ describe('Filter support', () => {
|
|||
root: './fixtures/static/',
|
||||
integrations: [
|
||||
sitemap({
|
||||
filter: (page) => page !== 'http://example.com/two/',
|
||||
filter: (page) => page === 'http://example.com/one/',
|
||||
}),
|
||||
],
|
||||
});
|
||||
|
@ -32,7 +32,7 @@ describe('Filter support', () => {
|
|||
root: './fixtures/ssr/',
|
||||
integrations: [
|
||||
sitemap({
|
||||
filter: (page) => page !== 'http://example.com/two/',
|
||||
filter: (page) => page === 'http://example.com/one/',
|
||||
}),
|
||||
],
|
||||
});
|
||||
|
|
8
packages/integrations/sitemap/test/fixtures/static/src/pages/123.astro
vendored
Normal file
8
packages/integrations/sitemap/test/fixtures/static/src/pages/123.astro
vendored
Normal file
|
@ -0,0 +1,8 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>123</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>123</h1>
|
||||
</body>
|
||||
</html>
|
8
packages/integrations/sitemap/test/fixtures/static/src/pages/404.astro
vendored
Normal file
8
packages/integrations/sitemap/test/fixtures/static/src/pages/404.astro
vendored
Normal file
|
@ -0,0 +1,8 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>404</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>404</h1>
|
||||
</body>
|
||||
</html>
|
|
@ -4,19 +4,29 @@ import { expect } from 'chai';
|
|||
describe('getStaticPaths support', () => {
|
||||
/** @type {import('./test-utils.js').Fixture} */
|
||||
let fixture;
|
||||
/** @type {string[]} */
|
||||
let urls;
|
||||
|
||||
before(async () => {
|
||||
fixture = await loadFixture({
|
||||
root: './fixtures/static/',
|
||||
});
|
||||
await fixture.build();
|
||||
});
|
||||
|
||||
it('getStaticPath pages require zero config', async () => {
|
||||
const data = await readXML(fixture.readFile('/sitemap-0.xml'));
|
||||
const urls = data.urlset.url;
|
||||
|
||||
expect(urls[0].loc[0]).to.equal('http://example.com/one/');
|
||||
expect(urls[1].loc[0]).to.equal('http://example.com/two/');
|
||||
urls = data.urlset.url.map(url => url.loc[0]);
|
||||
});
|
||||
|
||||
it('requires zero config for getStaticPaths', async () => {
|
||||
expect(urls).to.include('http://example.com/one/');
|
||||
expect(urls).to.include('http://example.com/two/');
|
||||
});
|
||||
|
||||
it('does not include 404 pages', () => {
|
||||
expect(urls).to.not.include('http://example.com/404/');
|
||||
});
|
||||
|
||||
it('includes numerical pages', () => {
|
||||
expect(urls).to.include('http://example.com/123/');
|
||||
})
|
||||
});
|
||||
|
|
Loading…
Reference in a new issue