Add sitemap filter option (#2755)
* Add filter for sitemap * chore: add changeset * fix: types * docs: add sitemapFilter docs Co-authored-by: David Rothmann <david@rothmann.it>
This commit is contained in:
parent
602bf2f181
commit
10843aba63
5 changed files with 48 additions and 4 deletions
18
.changeset/cool-ravens-occur.md
Normal file
18
.changeset/cool-ravens-occur.md
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
---
|
||||||
|
'astro': patch
|
||||||
|
---
|
||||||
|
|
||||||
|
Add user-configurable `sitemapFilter` option.
|
||||||
|
|
||||||
|
This option can be used to ensure certain pages are excluded from your final sitemap.
|
||||||
|
|
||||||
|
```ts
|
||||||
|
// astro.config.ts
|
||||||
|
import type { AstroUserConfig } from 'astro'
|
||||||
|
|
||||||
|
const config: AstroUserConfig = {
|
||||||
|
sitemap: true,
|
||||||
|
sitemapFilter: (page: string) => !page.includes('secret-page')
|
||||||
|
}
|
||||||
|
export default config
|
||||||
|
```
|
|
@ -226,6 +226,25 @@ export interface AstroUserConfig {
|
||||||
*/
|
*/
|
||||||
sitemap?: boolean;
|
sitemap?: boolean;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @docs
|
||||||
|
* @name buildOptions.sitemapFilter
|
||||||
|
* @type {undefined|((page: string) => boolean)}
|
||||||
|
* @default `undefined`
|
||||||
|
* @description
|
||||||
|
* Customize sitemap generation for your build by excluding certain pages.
|
||||||
|
*
|
||||||
|
* ```js
|
||||||
|
* {
|
||||||
|
* buildOptions: {
|
||||||
|
* sitemap: true
|
||||||
|
* sitemapFilter: (page) => !page.includes('secret-page')
|
||||||
|
* }
|
||||||
|
* }
|
||||||
|
* ```
|
||||||
|
*/
|
||||||
|
sitemapFilter?: (page: string) => boolean
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @docs
|
* @docs
|
||||||
* @name buildOptions.pageUrlFormat
|
* @name buildOptions.pageUrlFormat
|
||||||
|
|
|
@ -149,7 +149,8 @@ class AstroBuilder {
|
||||||
// Build your final sitemap.
|
// Build your final sitemap.
|
||||||
if (this.config.buildOptions.sitemap && this.config.buildOptions.site) {
|
if (this.config.buildOptions.sitemap && this.config.buildOptions.site) {
|
||||||
timer.sitemapStart = performance.now();
|
timer.sitemapStart = performance.now();
|
||||||
const sitemap = generateSitemap(pageNames.map((pageName) => new URL(pageName, this.config.buildOptions.site).href));
|
const sitemapFilter = this.config.buildOptions.sitemapFilter ? (this.config.buildOptions.sitemapFilter as (page: string) => boolean) : undefined;
|
||||||
|
const sitemap = generateSitemap(pageNames.map((pageName) => new URL(pageName, this.config.buildOptions.site).href), sitemapFilter);
|
||||||
const sitemapPath = new URL('./sitemap.xml', this.config.dist);
|
const sitemapPath = new URL('./sitemap.xml', this.config.dist);
|
||||||
await fs.promises.mkdir(new URL('./', sitemapPath), { recursive: true });
|
await fs.promises.mkdir(new URL('./', sitemapPath), { recursive: true });
|
||||||
await fs.promises.writeFile(sitemapPath, sitemap, 'utf8');
|
await fs.promises.writeFile(sitemapPath, sitemap, 'utf8');
|
||||||
|
|
|
@ -50,6 +50,7 @@ export const AstroConfigSchema = z.object({
|
||||||
.string()
|
.string()
|
||||||
.optional()
|
.optional()
|
||||||
.transform((val) => (val ? addTrailingSlash(val) : val)),
|
.transform((val) => (val ? addTrailingSlash(val) : val)),
|
||||||
|
sitemapFilter: z.function().optional(),
|
||||||
sitemap: z.boolean().optional().default(true),
|
sitemap: z.boolean().optional().default(true),
|
||||||
pageUrlFormat: z
|
pageUrlFormat: z
|
||||||
.union([z.literal('file'), z.literal('directory')])
|
.union([z.literal('file'), z.literal('directory')])
|
||||||
|
|
|
@ -1,14 +1,19 @@
|
||||||
|
|
||||||
const STATUS_CODE_PAGE_REGEXP = /\/[0-9]{3}\/?$/;
|
const STATUS_CODE_PAGE_REGEXP = /\/[0-9]{3}\/?$/;
|
||||||
|
|
||||||
/** Construct sitemap.xml given a set of URLs */
|
/** Construct sitemap.xml given a set of URLs */
|
||||||
export function generateSitemap(pages: string[]): string {
|
export function generateSitemap(pages: string[], filter?: (page: string) => boolean): string {
|
||||||
// TODO: find way to respect <link rel="canonical"> URLs here
|
// TODO: find way to respect <link rel="canonical"> URLs here
|
||||||
// TODO: find way to exclude pages from sitemap
|
|
||||||
|
|
||||||
// copy just in case original copy is needed
|
// copy just in case original copy is needed
|
||||||
// make sure that 404 page is excluded
|
// make sure that 404 page is excluded
|
||||||
// also works for other error pages
|
// also works for other error pages
|
||||||
const urls = [...pages].filter((url) => !STATUS_CODE_PAGE_REGEXP.test(url));
|
let urls = [...pages].filter((url) => !STATUS_CODE_PAGE_REGEXP.test(url));
|
||||||
|
|
||||||
|
if (filter) {
|
||||||
|
urls = urls.filter(url => filter(url));
|
||||||
|
}
|
||||||
|
|
||||||
urls.sort((a, b) => a.localeCompare(b, 'en', { numeric: true })); // sort alphabetically so sitemap is same each time
|
urls.sort((a, b) => a.localeCompare(b, 'en', { numeric: true })); // sort alphabetically so sitemap is same each time
|
||||||
let sitemap = `<?xml version="1.0" encoding="UTF-8"?><urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">`;
|
let sitemap = `<?xml version="1.0" encoding="UTF-8"?><urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">`;
|
||||||
for (const url of urls) {
|
for (const url of urls) {
|
||||||
|
|
Loading…
Reference in a new issue