1 | const get = require( __dirname + '/modules/get' )
|
2 | const extract = require( __dirname + '/modules/link-extract' )
|
3 | const clean = require( __dirname + '/modules/cleanlinks' )
|
4 | const restructure = require( __dirname + '/modules/link-restructure' )
|
5 |
|
6 | const checker = ( base, links ) => {
|
7 | let broken = {
|
8 |
|
9 | top: [],
|
10 |
|
11 | crawled: []
|
12 | }
|
13 |
|
14 | return clean( base, links )
|
15 |
|
16 | .then( cleanlinks => {
|
17 |
|
18 | return Promise.all( cleanlinks.map( link => {
|
19 | if ( process.env.debug ) console.log( 'Making GET promise for ' + link )
|
20 | return get( link ).catch( brokentop => {
|
21 |
|
22 | if ( process.env.debug ) console.log( 'broken links found' )
|
23 | broken.top.push( brokentop )
|
24 | } )
|
25 | } ) )
|
26 | } )
|
27 |
|
28 |
|
29 | .then( urlbodies => {
|
30 |
|
31 | urlbodies = urlbodies.filter( page => page != undefined && page.html != undefined )
|
32 |
|
33 |
|
34 | return Promise.all( urlbodies.map( page => {
|
35 | if ( process.env.debug ) console.log( 'Page extract for ' + page.url )
|
36 | return extract( base, page.url, page.html ).catch( console.log.bind( console ) )
|
37 | } ) )
|
38 | } )
|
39 |
|
40 |
|
41 | .then( restructure )
|
42 |
|
43 | .then( linksfromurls => {
|
44 |
|
45 | broken.allchecked = linksfromurls
|
46 | if ( process.env.debug ) console.log( 'Scanning links extracted from url pages' )
|
47 |
|
48 | return Promise.all( linksfromurls.map( thislink => get( thislink.link ).catch( kaput => broken.crawled.push( thislink ) ) ) )
|
49 | } )
|
50 | .then( f => {
|
51 |
|
52 | return Promise.resolve( broken )
|
53 | } )
|
54 | .catch( console.log.bind( console ) )
|
55 | }
|
56 |
|
57 | module.exports = checker |
\ | No newline at end of file |