Add sitemap generation (#120)

* Add sitemap generation

* Update README

* Fix index handling, sort sitemap

* Add --no-sitemap CLI flag (and config option)
This commit is contained in:
Drew Powers 2021-04-21 14:14:24 -06:00 committed by GitHub
parent 93e207e38b
commit a7185735da
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 128 additions and 30 deletions

View file

@ -52,6 +52,8 @@ export default {
}, },
/** Your public domain, e.g.: https://my-site.dev/ */ /** Your public domain, e.g.: https://my-site.dev/ */
site: '', site: '',
/** Generate sitemap (set to "false" to disable) */
sitemap: true,
}; };
``` ```
@ -154,6 +156,16 @@ const localData = Astro.fetchContent('../post/*.md');
--- ---
``` ```
### 🗺️ Sitemap
Astro will automatically create a `/sitemap.xml` for you for SEO! Be sure to set the `site` URL in your [Astro config][config] so the URLs can be generated properly.
⚠️ Note that Astro wont inject this into your HTML for you! Youll have to add the tag yourself in your `<head>` on all pages that need it:
```html
<link rel="sitemap" href="/sitemap.xml" />
```
##### Examples ##### Examples
- [Blog Example][example-blog] - [Blog Example][example-blog]

View file

@ -0,0 +1,3 @@
export default {
site: 'https://muppet-blog.github.io/',
};

View file

@ -189,7 +189,7 @@
"rollup": "^2.43.1", "rollup": "^2.43.1",
"rollup-plugin-terser": "^7.0.2", "rollup-plugin-terser": "^7.0.2",
"sass": "^1.32.8", "sass": "^1.32.8",
"snowpack": "^3.3.2", "snowpack": "^3.3.4",
"svelte": "^3.35.0", "svelte": "^3.35.0",
"tiny-glob": "^0.2.8", "tiny-glob": "^0.2.8",
"unified": "^9.2.1", "unified": "^9.2.1",

View file

@ -14,6 +14,10 @@ export interface AstroConfig {
astroRoot: URL; astroRoot: URL;
public: URL; public: URL;
extensions?: Record<string, ValidExtensionPlugins>; extensions?: Record<string, ValidExtensionPlugins>;
/** Public URL base (e.g. 'https://mysite.com'). Used in generating sitemaps and canonical URLs. */
site?: string;
/** Generate a sitemap? */
sitemap: boolean;
} }
export interface JsxItem { export interface JsxItem {

View file

@ -3,25 +3,29 @@ import type { LogOptions } from './logger';
import type { AstroRuntime, LoadResult } from './runtime'; import type { AstroRuntime, LoadResult } from './runtime';
import { existsSync, promises as fsPromises } from 'fs'; import { existsSync, promises as fsPromises } from 'fs';
import { relative as pathRelative } from 'path'; import path from 'path';
import cheerio from 'cheerio';
import { fileURLToPath } from 'url'; import { fileURLToPath } from 'url';
import { fdir } from 'fdir'; import { fdir } from 'fdir';
import { defaultLogDestination, error } from './logger.js'; import { defaultLogDestination, error, info } from './logger.js';
import { createRuntime } from './runtime.js'; import { createRuntime } from './runtime.js';
import { bundle, collectDynamicImports } from './build/bundle.js'; import { bundle, collectDynamicImports } from './build/bundle.js';
import { generateSitemap } from './build/sitemap.js';
import { collectStatics } from './build/static.js'; import { collectStatics } from './build/static.js';
const { mkdir, readdir, readFile, stat, writeFile } = fsPromises; const { mkdir, readFile, writeFile } = fsPromises;
interface PageBuildOptions { interface PageBuildOptions {
astroRoot: URL; astroRoot: URL;
dist: URL; dist: URL;
filepath: URL; filepath: URL;
runtime: AstroRuntime; runtime: AstroRuntime;
sitemap: boolean;
statics: Set<string>; statics: Set<string>;
} }
interface PageResult { interface PageResult {
canonicalURLs: string[];
statusCode: number; statusCode: number;
} }
@ -49,14 +53,14 @@ function mergeSet(a: Set<string>, b: Set<string>) {
} }
/** Utility for writing to file (async) */ /** Utility for writing to file (async) */
async function writeFilep(outPath: URL, bytes: string | Buffer, encoding: 'utf-8' | null) { async function writeFilep(outPath: URL, bytes: string | Buffer, encoding: 'utf8' | null) {
const outFolder = new URL('./', outPath); const outFolder = new URL('./', outPath);
await mkdir(outFolder, { recursive: true }); await mkdir(outFolder, { recursive: true });
await writeFile(outPath, bytes, encoding || 'binary'); await writeFile(outPath, bytes, encoding || 'binary');
} }
/** Utility for writing a build result to disk */ /** Utility for writing a build result to disk */
async function writeResult(result: LoadResult, outPath: URL, encoding: null | 'utf-8') { async function writeResult(result: LoadResult, outPath: URL, encoding: null | 'utf8') {
if (result.statusCode === 500 || result.statusCode === 404) { if (result.statusCode === 500 || result.statusCode === 404) {
error(logging, 'build', result.error || result.statusCode); error(logging, 'build', result.error || result.statusCode);
} else if (result.statusCode !== 200) { } else if (result.statusCode !== 200) {
@ -75,7 +79,7 @@ function getPageType(filepath: URL): 'collection' | 'static' {
/** Build collection */ /** Build collection */
async function buildCollectionPage({ astroRoot, dist, filepath, runtime, statics }: PageBuildOptions): Promise<PageResult> { async function buildCollectionPage({ astroRoot, dist, filepath, runtime, statics }: PageBuildOptions): Promise<PageResult> {
const rel = pathRelative(fileURLToPath(astroRoot) + '/pages', fileURLToPath(filepath)); // pages/index.astro const rel = path.relative(fileURLToPath(astroRoot) + '/pages', fileURLToPath(filepath)); // pages/index.astro
const pagePath = `/${rel.replace(/\$([^.]+)\.astro$/, '$1')}`; const pagePath = `/${rel.replace(/\$([^.]+)\.astro$/, '$1')}`;
const builtURLs = new Set<string>(); // !important: internal cache that prevents building the same URLs const builtURLs = new Set<string>(); // !important: internal cache that prevents building the same URLs
@ -86,8 +90,8 @@ async function buildCollectionPage({ astroRoot, dist, filepath, runtime, statics
builtURLs.add(url); builtURLs.add(url);
if (result.statusCode === 200) { if (result.statusCode === 200) {
const outPath = new URL('./' + url + '/index.html', dist); const outPath = new URL('./' + url + '/index.html', dist);
await writeResult(result, outPath, 'utf-8'); await writeResult(result, outPath, 'utf8');
mergeSet(statics, collectStatics(result.contents.toString('utf-8'))); mergeSet(statics, collectStatics(result.contents.toString('utf8')));
} }
return result; return result;
} }
@ -103,6 +107,7 @@ async function buildCollectionPage({ astroRoot, dist, filepath, runtime, statics
[...result.collectionInfo.additionalURLs].map(async (url) => { [...result.collectionInfo.additionalURLs].map(async (url) => {
// for the top set of additional URLs, we render every new URL generated // for the top set of additional URLs, we render every new URL generated
const addlResult = await loadCollection(url); const addlResult = await loadCollection(url);
builtURLs.add(url);
if (addlResult && addlResult.collectionInfo) { if (addlResult && addlResult.collectionInfo) {
// believe it or not, we may still have a few unbuilt pages left. this is our last crawl: // believe it or not, we may still have a few unbuilt pages left. this is our last crawl:
await Promise.all([...addlResult.collectionInfo.additionalURLs].map(async (url2) => loadCollection(url2))); await Promise.all([...addlResult.collectionInfo.additionalURLs].map(async (url2) => loadCollection(url2)));
@ -112,14 +117,16 @@ async function buildCollectionPage({ astroRoot, dist, filepath, runtime, statics
} }
return { return {
canonicalURLs: [...builtURLs].filter((url) => !url.endsWith('/1')), // note: canonical URLs are controlled by the collection, so these are canonical (but exclude "/1" pages as those are duplicates of the index)
statusCode: result.statusCode, statusCode: result.statusCode,
}; };
} }
/** Build static page */ /** Build static page */
async function buildStaticPage({ astroRoot, dist, filepath, runtime, statics }: PageBuildOptions): Promise<PageResult> { async function buildStaticPage({ astroRoot, dist, filepath, runtime, sitemap, statics }: PageBuildOptions): Promise<PageResult> {
const rel = pathRelative(fileURLToPath(astroRoot) + '/pages', fileURLToPath(filepath)); // pages/index.astro const rel = path.relative(fileURLToPath(astroRoot) + '/pages', fileURLToPath(filepath)); // pages/index.astro
const pagePath = `/${rel.replace(/\.(astro|md)$/, '')}`; const pagePath = `/${rel.replace(/\.(astro|md)$/, '')}`;
let canonicalURLs: string[] = [];
let relPath = './' + rel.replace(/\.(astro|md)$/, '.html'); let relPath = './' + rel.replace(/\.(astro|md)$/, '.html');
if (!relPath.endsWith('index.html')) { if (!relPath.endsWith('index.html')) {
@ -129,12 +136,21 @@ async function buildStaticPage({ astroRoot, dist, filepath, runtime, statics }:
const outPath = new URL(relPath, dist); const outPath = new URL(relPath, dist);
const result = await runtime.load(pagePath); const result = await runtime.load(pagePath);
await writeResult(result, outPath, 'utf-8'); await writeResult(result, outPath, 'utf8');
if (result.statusCode === 200) { if (result.statusCode === 200) {
mergeSet(statics, collectStatics(result.contents.toString('utf-8'))); mergeSet(statics, collectStatics(result.contents.toString('utf8')));
// get Canonical URL (if user has specified one manually, use that)
if (sitemap) {
const $ = cheerio.load(result.contents);
const canonicalTag = $('link[rel="canonical"]');
canonicalURLs.push(canonicalTag.attr('href') || pagePath.replace(/index$/, ''));
}
} }
return { return {
canonicalURLs,
statusCode: result.statusCode, statusCode: result.statusCode,
}; };
} }
@ -162,6 +178,7 @@ export async function build(astroConfig: AstroConfig): Promise<0 | 1> {
const collectImportsOptions = { astroConfig, logging, resolvePackageUrl, mode }; const collectImportsOptions = { astroConfig, logging, resolvePackageUrl, mode };
const pages = await allPages(pageRoot); const pages = await allPages(pageRoot);
let builtURLs: string[] = [];
try { try {
await Promise.all( await Promise.all(
@ -169,11 +186,13 @@ export async function build(astroConfig: AstroConfig): Promise<0 | 1> {
const filepath = new URL(`file://${pathname}`); const filepath = new URL(`file://${pathname}`);
const pageType = getPageType(filepath); const pageType = getPageType(filepath);
const pageOptions: PageBuildOptions = { astroRoot, dist, filepath, runtime, statics }; const pageOptions: PageBuildOptions = { astroRoot, dist, filepath, runtime, sitemap: astroConfig.sitemap, statics };
if (pageType === 'collection') { if (pageType === 'collection') {
await buildCollectionPage(pageOptions); const { canonicalURLs } = await buildCollectionPage(pageOptions);
builtURLs.push(...canonicalURLs);
} else { } else {
await buildStaticPage(pageOptions); const { canonicalURLs } = await buildStaticPage(pageOptions);
builtURLs.push(...canonicalURLs);
} }
mergeSet(imports, await collectDynamicImports(filepath, collectImportsOptions)); mergeSet(imports, await collectDynamicImports(filepath, collectImportsOptions));
@ -211,7 +230,7 @@ export async function build(astroConfig: AstroConfig): Promise<0 | 1> {
const publicFiles = (await new fdir().withFullPaths().crawl(fileURLToPath(pub)).withPromise()) as string[]; const publicFiles = (await new fdir().withFullPaths().crawl(fileURLToPath(pub)).withPromise()) as string[];
for (const filepath of publicFiles) { for (const filepath of publicFiles) {
const fileUrl = new URL(`file://${filepath}`); const fileUrl = new URL(`file://${filepath}`);
const rel = pathRelative(pub.pathname, fileUrl.pathname); const rel = path.relative(pub.pathname, fileUrl.pathname);
const outUrl = new URL('./' + rel, dist); const outUrl = new URL('./' + rel, dist);
const bytes = await readFile(fileUrl); const bytes = await readFile(fileUrl);
@ -219,6 +238,21 @@ export async function build(astroConfig: AstroConfig): Promise<0 | 1> {
} }
} }
// build sitemap
if (astroConfig.sitemap && astroConfig.site) {
const sitemap = generateSitemap(
builtURLs.map((url) => ({
canonicalURL: new URL(
path.extname(url) ? url : url.replace(/\/?$/, '/'), // add trailing slash if theres no extension
astroConfig.site
).href,
}))
);
await writeFile(new URL('./sitemap.xml', dist), sitemap, 'utf8');
} else if (astroConfig.sitemap) {
info(logging, 'tip', `Set your "site" in astro.config.mjs to generate a sitemap.xml`);
}
await runtime.shutdown(); await runtime.shutdown();
return 0; return 0;
} }

15
src/build/sitemap.ts Normal file
View file

@ -0,0 +1,15 @@
export interface PageMeta {
/** (required) The canonical URL of the page */
canonicalURL: string;
}
/** Construct sitemap.xml given a set of URLs */
export function generateSitemap(pages: PageMeta[]): string {
let sitemap = `<?xml version="1.0" encoding="UTF-8"?><urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">`;
pages.sort((a, b) => a.canonicalURL.localeCompare(b.canonicalURL, 'en', { numeric: true })); // sort alphabetically
for (const page of pages) {
sitemap += `<url><loc>${page.canonicalURL}</loc></url>`;
}
sitemap += `</urlset>\n`;
return sitemap;
}

View file

@ -16,24 +16,34 @@ const buildAndExit = async (...args: Parameters<typeof build>) => {
}; };
type Arguments = yargs.Arguments; type Arguments = yargs.Arguments;
type cliState = 'help' | 'version' | 'dev' | 'build'; type cliCommand = 'help' | 'version' | 'dev' | 'build';
interface CLIState {
cmd: cliCommand;
options: {
sitemap?: boolean;
};
}
/** Determine which action the user requested */ /** Determine which action the user requested */
function resolveArgs(flags: Arguments): cliState { function resolveArgs(flags: Arguments): CLIState {
const options: CLIState['options'] = {
sitemap: typeof flags.sitemap === 'boolean' ? flags.sitemap : undefined,
};
if (flags.version) { if (flags.version) {
return 'version'; return { cmd: 'version', options };
} else if (flags.help) { } else if (flags.help) {
return 'help'; return { cmd: 'help', options };
} }
const cmd = flags._[2]; const cmd = flags._[2];
switch (cmd) { switch (cmd) {
case 'dev': case 'dev':
return 'dev'; return { cmd: 'dev', options };
case 'build': case 'build':
return 'build'; return { cmd: 'build', options };
default: default:
return 'help'; return { cmd: 'help', options };
} }
} }
@ -48,6 +58,7 @@ function printHelp() {
${colors.bold('Flags:')} ${colors.bold('Flags:')}
--version Show the version number and exit. --version Show the version number and exit.
--help Show this help message. --help Show this help message.
--no-sitemap Disable sitemap generation (build only).
`); `);
} }
@ -57,10 +68,16 @@ async function printVersion() {
console.error(pkg.version); console.error(pkg.version);
} }
/** Merge CLI flags & config options (CLI flags take priority) */
function mergeCLIFlags(astroConfig: AstroConfig, flags: CLIState['options']) {
if (typeof flags.sitemap === 'boolean') astroConfig.sitemap = flags.sitemap;
}
/** Handle `astro run` command */ /** Handle `astro run` command */
async function runCommand(rawRoot: string, cmd: (a: AstroConfig) => Promise<void>) { async function runCommand(rawRoot: string, cmd: (a: AstroConfig) => Promise<void>, options: CLIState['options']) {
try { try {
const astroConfig = await loadConfig(rawRoot); const astroConfig = await loadConfig(rawRoot);
mergeCLIFlags(astroConfig, options);
return cmd(astroConfig); return cmd(astroConfig);
} catch (err) { } catch (err) {
console.error(colors.red(err.toString() || err)); console.error(colors.red(err.toString() || err));
@ -78,7 +95,7 @@ export async function cli(args: string[]) {
const flags = yargs(args); const flags = yargs(args);
const state = resolveArgs(flags); const state = resolveArgs(flags);
switch (state) { switch (state.cmd) {
case 'help': { case 'help': {
printHelp(); printHelp();
process.exit(1); process.exit(1);
@ -92,8 +109,8 @@ export async function cli(args: string[]) {
case 'build': case 'build':
case 'dev': { case 'dev': {
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
const cmd = cmdMap.get(state)!; const cmd = cmdMap.get(state.cmd)!;
runCommand(flags._[3], cmd); runCommand(flags._[3], cmd, state.options);
} }
} }
} }

View file

@ -12,11 +12,18 @@ function validateConfig(config: any): void {
if (typeof config !== 'object') throw new Error(`[astro config] Expected object, received ${typeof config}`); if (typeof config !== 'object') throw new Error(`[astro config] Expected object, received ${typeof config}`);
// strings // strings
for (const key of ['projectRoot', 'astroRoot', 'dist', 'public']) { for (const key of ['projectRoot', 'astroRoot', 'dist', 'public', 'site']) {
if (config[key] && typeof config[key] !== 'string') { if (config[key] !== undefined && config[key] !== null && typeof config[key] !== 'string') {
throw new Error(`[astro config] ${key}: ${JSON.stringify(config[key])}\n Expected string, received ${type(config[key])}.`); throw new Error(`[astro config] ${key}: ${JSON.stringify(config[key])}\n Expected string, received ${type(config[key])}.`);
} }
} }
// booleans
for (const key of ['sitemap']) {
if (config[key] !== undefined && config[key] !== null && typeof config[key] !== 'boolean') {
throw new Error(`[astro config] ${key}: ${JSON.stringify(config[key])}\n Expected boolean, received ${type(config[key])}.`);
}
}
} }
/** Set default config values */ /** Set default config values */
@ -28,6 +35,8 @@ function configDefaults(userConfig?: any): any {
if (!config.dist) config.dist = './_site'; if (!config.dist) config.dist = './_site';
if (!config.public) config.public = './public'; if (!config.public) config.public = './public';
if (typeof config.sitemap === 'undefined') config.sitemap = true;
return config; return config;
} }

View file

@ -0,0 +1,3 @@
export default {
sitemap: false,
};

View file

@ -2,4 +2,5 @@ export default {
extensions: { extensions: {
'.jsx': 'preact', '.jsx': 'preact',
}, },
sitemap: false,
}; };