1 | # sitemap ![MIT License](https://img.shields.io/npm/l/sitemap)[![Build Status](https://travis-ci.org/ekalinin/sitemap.js.svg?branch=master)](https://travis-ci.org/ekalinin/sitemap.js)![Monthly Downloads](https://img.shields.io/npm/dm/sitemap)
|
2 |
|
3 | **sitemap** is a high-level streaming sitemap-generating library/CLI that
|
4 | makes creating [sitemap XML](http://www.sitemaps.org/) files easy. [What is a sitemap?](https://support.google.com/webmasters/answer/156184?hl=en&ref_topic=4581190)
|
5 |
|
6 | ## Table of Contents
|
7 |
|
8 | - [Installation](#installation)
|
9 | - [Generate a one time sitemap from a list of urls](#generate-a-one-time-sitemap-from-a-list-of-urls)
|
10 | - [Example of using sitemap.js with](#serve-a-sitemap-from-a-server-and-periodically-update-it) [express](https://expressjs.com/)
|
11 | - [Generating more than one sitemap](#create-sitemap-and-index-files-from-one-large-list)
|
12 | - [Options you can pass](#options-you-can-pass)
|
13 | - [Examples](#examples)
|
14 | - [API](#api)
|
15 | - [Maintainers](#maintainers)
|
16 | - [License](#license)
|
17 |
|
18 | ## Installation
|
19 |
|
20 | ```sh
|
21 | npm install --save sitemap
|
22 | ```
|
23 |
|
24 | ## Generate a one time sitemap from a list of urls
|
25 |
|
26 | If you are just looking to take a giant list of URLs and turn it into some sitemaps, try out our CLI. The cli can also parse, update and validate existing sitemaps.
|
27 |
|
28 | ```sh
|
29 | npx sitemap < listofurls.txt # `npx sitemap -h` for more examples and a list of options.
|
30 | ```
|
31 |
|
32 | For programmatic one time generation of a sitemap try:
|
33 |
|
34 | ```js
|
35 | const { SitemapStream, streamToPromise } = require( 'sitemap' )
|
36 | const { Readable } = require( 'stream' )
|
37 |
|
38 | // An array with your links
|
39 | const links = [{ url: '/page-1/', changefreq: 'daily', priority: 0.3 }]
|
40 |
|
41 | // Create a stream to write to
|
42 | const stream = new SitemapStream( { hostname: 'https://...' } )
|
43 |
|
44 | // Return a promise that resolves with your XML string
|
45 | return streamToPromise(Readable.from(links).pipe(stream)).then((data) =>
|
46 | data.toString()
|
47 | )
|
48 | ```
|
49 |
|
50 | ## Serve a sitemap from a server and periodically update it
|
51 |
|
52 | Use this if you have less than 50 thousand urls. See SitemapAndIndexStream for if you have more.
|
53 |
|
54 | ```js
|
55 | const express = require('express')
|
56 | const { SitemapStream, streamToPromise } = require('sitemap')
|
57 | const { createGzip } = require('zlib')
|
58 | const { Readable } = require('stream')
|
59 |
|
60 | const app = express()
|
61 | let sitemap
|
62 |
|
63 | app.get('/sitemap.xml', function(req, res) {
|
64 | res.header('Content-Type', 'application/xml');
|
65 | res.header('Content-Encoding', 'gzip');
|
66 | // if we have a cached entry send it
|
67 | if (sitemap) {
|
68 | res.send(sitemap)
|
69 | return
|
70 | }
|
71 |
|
72 | try {
|
73 | const smStream = new SitemapStream({ hostname: 'https://example.com/' })
|
74 | const pipeline = smStream.pipe(createGzip())
|
75 |
|
76 | // pipe your entries or directly write them.
|
77 | smStream.write({ url: '/page-1/', changefreq: 'daily', priority: 0.3 })
|
78 | smStream.write({ url: '/page-2/', changefreq: 'monthly', priority: 0.7 })
|
79 | smStream.write({ url: '/page-3/'}) // changefreq: 'weekly', priority: 0.5
|
80 | smStream.write({ url: '/page-4/', img: "http://urlTest.com" })
|
81 | /* or use
|
82 | Readable.from([{url: '/page-1'}...]).pipe(smStream)
|
83 | if you are looking to avoid writing your own loop.
|
84 | */
|
85 |
|
86 | // cache the response
|
87 | streamToPromise(pipeline).then(sm => sitemap = sm)
|
88 | // make sure to attach a write stream such as streamToPromise before ending
|
89 | smStream.end()
|
90 | // stream write the response
|
91 | pipeline.pipe(res).on('error', (e) => {throw e})
|
92 | } catch (e) {
|
93 | console.error(e)
|
94 | res.status(500).end()
|
95 | }
|
96 | })
|
97 |
|
98 | app.listen(3000, () => {
|
99 | console.log('listening')
|
100 | });
|
101 | ```
|
102 |
|
103 | ## Create sitemap and index files from one large list
|
104 |
|
105 | If you know you are definitely going to have more than 50,000 urls in your sitemap, you can use this slightly more complex interface to create a new sitemap every 45,000 entries and add that file to a sitemap index.
|
106 |
|
107 | ```js
|
108 | const { createReadStream, createWriteStream } = require('fs');
|
109 | const { resolve } = require('path');
|
110 | const { createGzip } = require('zlib')
|
111 | const {
|
112 | simpleSitemapAndIndex,
|
113 | lineSeparatedURLsToSitemapOptions
|
114 | } = require('sitemap')
|
115 |
|
116 | // writes sitemaps and index out to the destination you provide.
|
117 | simpleSitemapAndIndex({
|
118 | hostname: 'https://example.com',
|
119 | destinationDir: './',
|
120 | sourceData: lineSeparatedURLsToSitemapOptions(
|
121 | createReadStream('./your-data.json.txt')
|
122 | ),
|
123 | // or (only works with node 10.17 and up)
|
124 | sourceData: [{ url: '/page-1/', changefreq: 'daily'}, ...],
|
125 | // or
|
126 | sourceData: './your-data.json.txt',
|
127 | }).then(() => {
|
128 | // Do follow up actions
|
129 | })
|
130 | ```
|
131 |
|
132 | Want to customize that?
|
133 |
|
134 | ```js
|
135 | const { createReadStream, createWriteStream } = require('fs');
|
136 | const { resolve } = require('path');
|
137 | const { createGzip } = require('zlib')
|
138 | const { Readable } = require('stream')
|
139 | const {
|
140 | SitemapAndIndexStream,
|
141 | SitemapStream,
|
142 | lineSeparatedURLsToSitemapOptions
|
143 | } = require('sitemap')
|
144 |
|
145 | const sms = new SitemapAndIndexStream({
|
146 | limit: 50000, // defaults to 45k
|
147 | // SitemapAndIndexStream will call this user provided function every time
|
148 | // it needs to create a new sitemap file. You merely need to return a stream
|
149 | // for it to write the sitemap urls to and the expected url where that sitemap will be hosted
|
150 | getSitemapStream: (i) => {
|
151 | const sitemapStream = new SitemapStream({ hostname: 'https://example.com' });
|
152 | // if your server automatically serves sitemap.xml.gz when requesting sitemap.xml leave this line be
|
153 | // otherwise you will need to add .gz here and remove it a couple lines below so that both the index
|
154 | // and the actual file have a .gz extension
|
155 | const path = `./sitemap-${i}.xml`;
|
156 |
|
157 | sitemapStream
|
158 | .pipe(createGzip()) // compress the output of the sitemap
|
159 | .pipe(createWriteStream(resolve(path + '.gz'))); // write it to sitemap-NUMBER.xml
|
160 |
|
161 | return [new URL(path, 'https://example.com/subdir/').toString(), sitemapStream];
|
162 | },
|
163 | });
|
164 |
|
165 | // when reading from a file
|
166 | lineSeparatedURLsToSitemapOptions(
|
167 | createReadStream('./your-data.json.txt')
|
168 | )
|
169 | .pipe(sms)
|
170 | .pipe(createGzip())
|
171 | .pipe(createWriteStream(resolve('./sitemap-index.xml.gz')));
|
172 |
|
173 | // or reading straight from an in-memory array
|
174 | sms
|
175 | .pipe(createGzip())
|
176 | .pipe(createWriteStream(resolve('./sitemap-index.xml.gz')));
|
177 |
|
178 | const arrayOfSitemapItems = [{ url: '/page-1/', changefreq: 'daily'}, ...]
|
179 | Readable.from(arrayOfSitemapItems).pipe(sms) // available as of node 10.17.0
|
180 | // or
|
181 | arrayOfSitemapItems.forEach(item => sms.write(item))
|
182 | sms.end() // necessary to let it know you've got nothing else to write
|
183 | ```
|
184 |
|
185 | ### Options you can pass
|
186 |
|
187 | ```js
|
188 | const { SitemapStream, streamToPromise } = require('sitemap');
|
189 | const smStream = new SitemapStream({
|
190 | hostname: 'http://www.mywebsite.com',
|
191 | xslUrl: "https://example.com/style.xsl",
|
192 | lastmodDateOnly: false, // print date not time
|
193 | xmlns: { // trim the xml namespace
|
194 | news: true, // flip to false to omit the xml namespace for news
|
195 | xhtml: true,
|
196 | image: true,
|
197 | video: true,
|
198 | custom: [
|
199 | 'xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"',
|
200 | 'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"',
|
201 | ],
|
202 | }
|
203 | })
|
204 | // coalesce stream to value
|
205 | // alternatively you can pipe to another stream
|
206 | streamToPromise(smStream).then(console.log)
|
207 |
|
208 | smStream.write({
|
209 | url: '/page1',
|
210 | changefreq: 'weekly',
|
211 | priority: 0.8, // A hint to the crawler that it should prioritize this over items less than 0.8
|
212 | })
|
213 |
|
214 | // each sitemap entry supports many options
|
215 | // See [Sitemap Item Options](./api.md#sitemap-item-options) below for details
|
216 | smStream.write({
|
217 | url: 'http://test.com/page-1/',
|
218 | img: [
|
219 | {
|
220 | url: 'http://test.com/img1.jpg',
|
221 | caption: 'An image',
|
222 | title: 'The Title of Image One',
|
223 | geoLocation: 'London, United Kingdom',
|
224 | license: 'https://creativecommons.org/licenses/by/4.0/'
|
225 | },
|
226 | {
|
227 | url: 'http://test.com/img2.jpg',
|
228 | caption: 'Another image',
|
229 | title: 'The Title of Image Two',
|
230 | geoLocation: 'London, United Kingdom',
|
231 | license: 'https://creativecommons.org/licenses/by/4.0/'
|
232 | }
|
233 | ],
|
234 | video: [
|
235 | {
|
236 | thumbnail_loc: 'http://test.com/tmbn1.jpg',
|
237 | title: 'A video title',
|
238 | description: 'This is a video'
|
239 | },
|
240 | {
|
241 | thumbnail_loc: 'http://test.com/tmbn2.jpg',
|
242 | title: 'A video with an attribute',
|
243 | description: 'This is another video',
|
244 | 'player_loc': 'http://www.example.com/videoplayer.mp4?video=123',
|
245 | 'player_loc:autoplay': 'ap=1',
|
246 | 'player_loc:allow_embed': 'yes'
|
247 | }
|
248 | ],
|
249 | links: [
|
250 | { lang: 'en', url: 'http://test.com/page-1/' },
|
251 | { lang: 'ja', url: 'http://test.com/page-1/ja/' }
|
252 | ],
|
253 | androidLink: 'android-app://com.company.test/page-1/',
|
254 | news: {
|
255 | publication: {
|
256 | name: 'The Example Times',
|
257 | language: 'en'
|
258 | },
|
259 | genres: 'PressRelease, Blog',
|
260 | publication_date: '2008-12-23',
|
261 | title: 'Companies A, B in Merger Talks',
|
262 | keywords: 'business, merger, acquisition, A, B',
|
263 | stock_tickers: 'NASDAQ:A, NASDAQ:B'
|
264 | }
|
265 | })
|
266 | // indicate there is nothing left to write
|
267 | smStream.end()
|
268 | ```
|
269 |
|
270 | ## Examples
|
271 |
|
272 | For more examples see the [examples directory](./examples/)
|
273 |
|
274 | ## API
|
275 |
|
276 | Full API docs can be found [here](./api.md)
|
277 |
|
278 | ## Maintainers
|
279 |
|
280 | - [@ekalinin](https://github.com/ekalinin)
|
281 | - [@derduher](https://github.com/derduher)
|
282 |
|
283 | ## License
|
284 |
|
285 | See [LICENSE](https://github.com/ekalinin/sitemap.js/blob/master/LICENSE) file.
|