Add sitemap generation (#120)

* Add sitemap generation

* Update README

* Fix index handling, sort sitemap

* Add --no-sitemap CLI flag (and config option)
This commit is contained in:
Drew Powers 2021-04-21 14:14:24 -06:00 committed by GitHub
parent 93e207e38b
commit a7185735da
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 128 additions and 30 deletions

View file

@ -52,6 +52,8 @@ export default {
},
/** Your public domain, e.g.: https://my-site.dev/ */
site: '',
/** Generate sitemap (set to "false" to disable) */
sitemap: true,
};
```
@ -154,6 +156,16 @@ const localData = Astro.fetchContent('../post/*.md');
---
```
### 🗺️ Sitemap
Astro will automatically create a `/sitemap.xml` for you for SEO! Be sure to set the `site` URL in your [Astro config][config] so the URLs can be generated properly.
⚠️ Note that Astro wont inject this into your HTML for you! Youll have to add the tag yourself in your `<head>` on all pages that need it:
```html
<link rel="sitemap" href="/sitemap.xml" />
```
##### Examples
- [Blog Example][example-blog]

View file

@ -0,0 +1,3 @@
export default {
site: 'https://muppet-blog.github.io/',
};

View file

@ -189,7 +189,7 @@
"rollup": "^2.43.1",
"rollup-plugin-terser": "^7.0.2",
"sass": "^1.32.8",
"snowpack": "^3.3.2",
"snowpack": "^3.3.4",
"svelte": "^3.35.0",
"tiny-glob": "^0.2.8",
"unified": "^9.2.1",

View file

@ -14,6 +14,10 @@ export interface AstroConfig {
astroRoot: URL;
public: URL;
extensions?: Record<string, ValidExtensionPlugins>;
/** Public URL base (e.g. 'https://mysite.com'). Used in generating sitemaps and canonical URLs. */
site?: string;
/** Generate a sitemap? */
sitemap: boolean;
}
export interface JsxItem {

View file

@ -3,25 +3,29 @@ import type { LogOptions } from './logger';
import type { AstroRuntime, LoadResult } from './runtime';
import { existsSync, promises as fsPromises } from 'fs';
import { relative as pathRelative } from 'path';
import path from 'path';
import cheerio from 'cheerio';
import { fileURLToPath } from 'url';
import { fdir } from 'fdir';
import { defaultLogDestination, error } from './logger.js';
import { defaultLogDestination, error, info } from './logger.js';
import { createRuntime } from './runtime.js';
import { bundle, collectDynamicImports } from './build/bundle.js';
import { generateSitemap } from './build/sitemap.js';
import { collectStatics } from './build/static.js';
const { mkdir, readdir, readFile, stat, writeFile } = fsPromises;
const { mkdir, readFile, writeFile } = fsPromises;
interface PageBuildOptions {
astroRoot: URL;
dist: URL;
filepath: URL;
runtime: AstroRuntime;
sitemap: boolean;
statics: Set<string>;
}
interface PageResult {
canonicalURLs: string[];
statusCode: number;
}
@ -49,14 +53,14 @@ function mergeSet(a: Set<string>, b: Set<string>) {
}
/** Utility for writing to file (async) */
async function writeFilep(outPath: URL, bytes: string | Buffer, encoding: 'utf-8' | null) {
async function writeFilep(outPath: URL, bytes: string | Buffer, encoding: 'utf8' | null) {
const outFolder = new URL('./', outPath);
await mkdir(outFolder, { recursive: true });
await writeFile(outPath, bytes, encoding || 'binary');
}
/** Utility for writing a build result to disk */
async function writeResult(result: LoadResult, outPath: URL, encoding: null | 'utf-8') {
async function writeResult(result: LoadResult, outPath: URL, encoding: null | 'utf8') {
if (result.statusCode === 500 || result.statusCode === 404) {
error(logging, 'build', result.error || result.statusCode);
} else if (result.statusCode !== 200) {
@ -75,7 +79,7 @@ function getPageType(filepath: URL): 'collection' | 'static' {
/** Build collection */
async function buildCollectionPage({ astroRoot, dist, filepath, runtime, statics }: PageBuildOptions): Promise<PageResult> {
const rel = pathRelative(fileURLToPath(astroRoot) + '/pages', fileURLToPath(filepath)); // pages/index.astro
const rel = path.relative(fileURLToPath(astroRoot) + '/pages', fileURLToPath(filepath)); // pages/index.astro
const pagePath = `/${rel.replace(/\$([^.]+)\.astro$/, '$1')}`;
const builtURLs = new Set<string>(); // !important: internal cache that prevents building the same URLs
@ -86,8 +90,8 @@ async function buildCollectionPage({ astroRoot, dist, filepath, runtime, statics
builtURLs.add(url);
if (result.statusCode === 200) {
const outPath = new URL('./' + url + '/index.html', dist);
await writeResult(result, outPath, 'utf-8');
mergeSet(statics, collectStatics(result.contents.toString('utf-8')));
await writeResult(result, outPath, 'utf8');
mergeSet(statics, collectStatics(result.contents.toString('utf8')));
}
return result;
}
@ -103,6 +107,7 @@ async function buildCollectionPage({ astroRoot, dist, filepath, runtime, statics
[...result.collectionInfo.additionalURLs].map(async (url) => {
// for the top set of additional URLs, we render every new URL generated
const addlResult = await loadCollection(url);
builtURLs.add(url);
if (addlResult && addlResult.collectionInfo) {
// believe it or not, we may still have a few unbuilt pages left. this is our last crawl:
await Promise.all([...addlResult.collectionInfo.additionalURLs].map(async (url2) => loadCollection(url2)));
@ -112,14 +117,16 @@ async function buildCollectionPage({ astroRoot, dist, filepath, runtime, statics
}
return {
canonicalURLs: [...builtURLs].filter((url) => !url.endsWith('/1')), // note: canonical URLs are controlled by the collection, so these are canonical (but exclude "/1" pages as those are duplicates of the index)
statusCode: result.statusCode,
};
}
/** Build static page */
async function buildStaticPage({ astroRoot, dist, filepath, runtime, statics }: PageBuildOptions): Promise<PageResult> {
const rel = pathRelative(fileURLToPath(astroRoot) + '/pages', fileURLToPath(filepath)); // pages/index.astro
async function buildStaticPage({ astroRoot, dist, filepath, runtime, sitemap, statics }: PageBuildOptions): Promise<PageResult> {
const rel = path.relative(fileURLToPath(astroRoot) + '/pages', fileURLToPath(filepath)); // pages/index.astro
const pagePath = `/${rel.replace(/\.(astro|md)$/, '')}`;
let canonicalURLs: string[] = [];
let relPath = './' + rel.replace(/\.(astro|md)$/, '.html');
if (!relPath.endsWith('index.html')) {
@ -129,12 +136,21 @@ async function buildStaticPage({ astroRoot, dist, filepath, runtime, statics }:
const outPath = new URL(relPath, dist);
const result = await runtime.load(pagePath);
await writeResult(result, outPath, 'utf-8');
await writeResult(result, outPath, 'utf8');
if (result.statusCode === 200) {
mergeSet(statics, collectStatics(result.contents.toString('utf-8')));
mergeSet(statics, collectStatics(result.contents.toString('utf8')));
// get Canonical URL (if user has specified one manually, use that)
if (sitemap) {
const $ = cheerio.load(result.contents);
const canonicalTag = $('link[rel="canonical"]');
canonicalURLs.push(canonicalTag.attr('href') || pagePath.replace(/index$/, ''));
}
}
return {
canonicalURLs,
statusCode: result.statusCode,
};
}
@ -162,6 +178,7 @@ export async function build(astroConfig: AstroConfig): Promise<0 | 1> {
const collectImportsOptions = { astroConfig, logging, resolvePackageUrl, mode };
const pages = await allPages(pageRoot);
let builtURLs: string[] = [];
try {
await Promise.all(
@ -169,11 +186,13 @@ export async function build(astroConfig: AstroConfig): Promise<0 | 1> {
const filepath = new URL(`file://${pathname}`);
const pageType = getPageType(filepath);
const pageOptions: PageBuildOptions = { astroRoot, dist, filepath, runtime, statics };
const pageOptions: PageBuildOptions = { astroRoot, dist, filepath, runtime, sitemap: astroConfig.sitemap, statics };
if (pageType === 'collection') {
await buildCollectionPage(pageOptions);
const { canonicalURLs } = await buildCollectionPage(pageOptions);
builtURLs.push(...canonicalURLs);
} else {
await buildStaticPage(pageOptions);
const { canonicalURLs } = await buildStaticPage(pageOptions);
builtURLs.push(...canonicalURLs);
}
mergeSet(imports, await collectDynamicImports(filepath, collectImportsOptions));
@ -211,7 +230,7 @@ export async function build(astroConfig: AstroConfig): Promise<0 | 1> {
const publicFiles = (await new fdir().withFullPaths().crawl(fileURLToPath(pub)).withPromise()) as string[];
for (const filepath of publicFiles) {
const fileUrl = new URL(`file://${filepath}`);
const rel = pathRelative(pub.pathname, fileUrl.pathname);
const rel = path.relative(pub.pathname, fileUrl.pathname);
const outUrl = new URL('./' + rel, dist);
const bytes = await readFile(fileUrl);
@ -219,6 +238,21 @@ export async function build(astroConfig: AstroConfig): Promise<0 | 1> {
}
}
// build sitemap
if (astroConfig.sitemap && astroConfig.site) {
const sitemap = generateSitemap(
builtURLs.map((url) => ({
canonicalURL: new URL(
path.extname(url) ? url : url.replace(/\/?$/, '/'), // add trailing slash if theres no extension
astroConfig.site
).href,
}))
);
await writeFile(new URL('./sitemap.xml', dist), sitemap, 'utf8');
} else if (astroConfig.sitemap) {
info(logging, 'tip', `Set your "site" in astro.config.mjs to generate a sitemap.xml`);
}
await runtime.shutdown();
return 0;
}

15
src/build/sitemap.ts Normal file
View file

@ -0,0 +1,15 @@
export interface PageMeta {
/** (required) The canonical URL of the page */
canonicalURL: string;
}
/** Construct sitemap.xml given a set of URLs */
export function generateSitemap(pages: PageMeta[]): string {
let sitemap = `<?xml version="1.0" encoding="UTF-8"?><urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">`;
pages.sort((a, b) => a.canonicalURL.localeCompare(b.canonicalURL, 'en', { numeric: true })); // sort alphabetically
for (const page of pages) {
sitemap += `<url><loc>${page.canonicalURL}</loc></url>`;
}
sitemap += `</urlset>\n`;
return sitemap;
}

View file

@ -16,24 +16,34 @@ const buildAndExit = async (...args: Parameters<typeof build>) => {
};
type Arguments = yargs.Arguments;
type cliState = 'help' | 'version' | 'dev' | 'build';
type cliCommand = 'help' | 'version' | 'dev' | 'build';
interface CLIState {
cmd: cliCommand;
options: {
sitemap?: boolean;
};
}
/** Determine which action the user requested */
function resolveArgs(flags: Arguments): cliState {
function resolveArgs(flags: Arguments): CLIState {
const options: CLIState['options'] = {
sitemap: typeof flags.sitemap === 'boolean' ? flags.sitemap : undefined,
};
if (flags.version) {
return 'version';
return { cmd: 'version', options };
} else if (flags.help) {
return 'help';
return { cmd: 'help', options };
}
const cmd = flags._[2];
switch (cmd) {
case 'dev':
return 'dev';
return { cmd: 'dev', options };
case 'build':
return 'build';
return { cmd: 'build', options };
default:
return 'help';
return { cmd: 'help', options };
}
}
@ -48,6 +58,7 @@ function printHelp() {
${colors.bold('Flags:')}
--version Show the version number and exit.
--help Show this help message.
--no-sitemap Disable sitemap generation (build only).
`);
}
@ -57,10 +68,16 @@ async function printVersion() {
console.error(pkg.version);
}
/** Merge CLI flags & config options (CLI flags take priority) */
function mergeCLIFlags(astroConfig: AstroConfig, flags: CLIState['options']) {
if (typeof flags.sitemap === 'boolean') astroConfig.sitemap = flags.sitemap;
}
/** Handle `astro run` command */
async function runCommand(rawRoot: string, cmd: (a: AstroConfig) => Promise<void>) {
async function runCommand(rawRoot: string, cmd: (a: AstroConfig) => Promise<void>, options: CLIState['options']) {
try {
const astroConfig = await loadConfig(rawRoot);
mergeCLIFlags(astroConfig, options);
return cmd(astroConfig);
} catch (err) {
console.error(colors.red(err.toString() || err));
@ -78,7 +95,7 @@ export async function cli(args: string[]) {
const flags = yargs(args);
const state = resolveArgs(flags);
switch (state) {
switch (state.cmd) {
case 'help': {
printHelp();
process.exit(1);
@ -92,8 +109,8 @@ export async function cli(args: string[]) {
case 'build':
case 'dev': {
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
const cmd = cmdMap.get(state)!;
runCommand(flags._[3], cmd);
const cmd = cmdMap.get(state.cmd)!;
runCommand(flags._[3], cmd, state.options);
}
}
}

View file

@ -12,11 +12,18 @@ function validateConfig(config: any): void {
if (typeof config !== 'object') throw new Error(`[astro config] Expected object, received ${typeof config}`);
// strings
for (const key of ['projectRoot', 'astroRoot', 'dist', 'public']) {
if (config[key] && typeof config[key] !== 'string') {
for (const key of ['projectRoot', 'astroRoot', 'dist', 'public', 'site']) {
if (config[key] !== undefined && config[key] !== null && typeof config[key] !== 'string') {
throw new Error(`[astro config] ${key}: ${JSON.stringify(config[key])}\n Expected string, received ${type(config[key])}.`);
}
}
// booleans
for (const key of ['sitemap']) {
if (config[key] !== undefined && config[key] !== null && typeof config[key] !== 'boolean') {
throw new Error(`[astro config] ${key}: ${JSON.stringify(config[key])}\n Expected boolean, received ${type(config[key])}.`);
}
}
}
/** Set default config values */
@ -28,6 +35,8 @@ function configDefaults(userConfig?: any): any {
if (!config.dist) config.dist = './_site';
if (!config.public) config.public = './public';
if (typeof config.sitemap === 'undefined') config.sitemap = true;
return config;
}

View file

@ -0,0 +1,3 @@
export default {
sitemap: false,
};

View file

@ -2,4 +2,5 @@ export default {
extensions: {
'.jsx': 'preact',
},
sitemap: false,
};