1 | through2 = require "through2"
|
2 | Promise = require "bluebird"
|
3 | req = Promise.promisifyAll(require "request")
|
4 | cheerio = require "cheerio"
|
5 | url = require "url"
|
6 | gutil = require "gulp-util"
|
7 | _ = require "lodash"
|
8 |
|
9 |
|
10 |
|
11 | Inkpad =
|
12 |
|
13 | extractIds: ->
|
14 | through2.obj (file, enc, done) ->
|
15 | file.contents.toString().replace /inkpads.*([a-z0-9]{10})/ig, (m, id) =>
|
16 | @push id: id
|
17 | done()
|
18 |
|
19 |
|
20 |
|
21 | allLoaded: (pads) ->
|
22 | for id, pad of pads
|
23 | if !pad.loaded
|
24 | return false
|
25 | true
|
26 |
|
27 |
|
28 |
|
29 | registry: ->
|
30 | pads = {}
|
31 |
|
32 | transform = (pad, enc, done) ->
|
33 | if !pads[pad.id]
|
34 | pad.loaded = false
|
35 | pads[pad.id] = pad
|
36 | @push pad
|
37 | done()
|
38 |
|
39 | flush = (done) ->
|
40 | fn = ->
|
41 | if Inkpad.allLoaded pads
|
42 | done()
|
43 | else
|
44 | setTimeout fn, 100
|
45 | fn()
|
46 |
|
47 | r = through2.obj transform, flush
|
48 | r.addPad = (pad) ->
|
49 | transform.call r, pad, null, ->
|
50 | pads[pad.id].path = pad.path
|
51 | r
|
52 |
|
53 |
|
54 |
|
55 | loadPads: ->
|
56 | pads = {}
|
57 |
|
58 | transform = (pad, enc, done) ->
|
59 | id = pad.id
|
60 | pad.loaded = false
|
61 | pads[id] = pad
|
62 |
|
63 | uri = "https://www.inkpad.io/#{id}"
|
64 | gutil.log "[inkpad] Loading #{uri}"
|
65 |
|
66 | self = @
|
67 | req.getAsync(uri)
|
68 | .spread (resp, body) ->
|
69 | pad.contents = body
|
70 | pad.loaded = true
|
71 | self.push pad
|
72 |
|
73 | done()
|
74 |
|
75 | flush = (done) ->
|
76 | fn = ->
|
77 | if Inkpad.allLoaded pads
|
78 | done()
|
79 | else
|
80 | setTimeout ->
|
81 | fn()
|
82 | , 100
|
83 | fn()
|
84 |
|
85 | through2.obj transform, flush
|
86 |
|
87 |
|
88 |
|
89 | scanForSubPages: (reg) ->
|
90 |
|
91 | transform = (pad, enc, done) ->
|
92 | $ = cheerio.load(pad.contents)
|
93 | $('a[href*="inkpad.io"]').each ->
|
94 | relPath = $(@).attr 'title'
|
95 | uri = $(@).attr 'href'
|
96 | match = url.parse(uri).pathname.match /^\/([a-z0-9]{10})/i
|
97 |
|
98 | if relPath?.search(/^\//) >= 0 and match
|
99 | id = match[1]
|
100 | $(@).attr 'href', relPath
|
101 | $(@).removeAttr 'title'
|
102 | pad.linkedInkpads ||= []
|
103 | pad.linkedInkpads.push id
|
104 | reg.addPad id: id, path: relPath
|
105 |
|
106 | pad.contents = $.html()
|
107 | @push pad
|
108 | done()
|
109 |
|
110 | through2.obj transform
|
111 |
|
112 |
|
113 |
|
114 | slicePads: ->
|
115 |
|
116 | transform = (pad, enc, done) ->
|
117 | $ = cheerio.load(pad.contents)
|
118 | pad.contents = $(".markdown-body").html()
|
119 | pad.normalizedContents = pad.contents
|
120 | @push pad
|
121 | done()
|
122 |
|
123 | through2.obj transform
|
124 |
|
125 |
|
126 |
|
127 | extractTitle: ->
|
128 |
|
129 | transform = (pad, enc, done) ->
|
130 | $ = cheerio.load(pad.normalizedContents)
|
131 | titleEl = $("h1,h2,h3,h4,h5,h6").first()
|
132 | title = titleEl.text()
|
133 | pad.title = title
|
134 | titleEl.remove()
|
135 | pad.normalizedContents = $.html()
|
136 | @push pad
|
137 | done()
|
138 |
|
139 | through2.obj transform
|
140 |
|
141 |
|
142 |
|
143 | extractTimestamp: ->
|
144 |
|
145 | transform = (pad, enc, done) ->
|
146 | $ = cheerio.load(pad.normalizedContents)
|
147 | el = $("time").first()
|
148 | time = el.attr("datetime") or el.text()
|
149 | if time
|
150 | pad.timestamp = new Date(time)
|
151 | el.remove()
|
152 | pad.normalizedContents = $.html()
|
153 | @push pad
|
154 | done()
|
155 |
|
156 | through2.obj transform
|
157 |
|
158 |
|
159 |
|
160 | extractHeaderImage: ->
|
161 |
|
162 | transform = (pad, enc, done) ->
|
163 | $ = cheerio.load(pad.normalizedContents)
|
164 |
|
165 | el = $('img[alt*="header" i]').first()
|
166 | if el
|
167 | pad.headerImageUrl = el.attr("src")
|
168 | el.remove()
|
169 | pad.normalizedContents = $.html()
|
170 | @push pad
|
171 | done()
|
172 |
|
173 | through2.obj transform
|
174 |
|
175 |
|
176 |
|
177 | extractTeaser: ->
|
178 |
|
179 | transform = (pad, enc, done) ->
|
180 | $ = cheerio.load(pad.normalizedContents)
|
181 | text = $("p").text().substring(0, 255).replace(/\s\w+$/, '')
|
182 | pad.teaser = text
|
183 | @push pad
|
184 | done()
|
185 |
|
186 | through2.obj transform
|
187 |
|
188 |
|
189 |
|
190 | module.exports = Inkpad
|
191 |
|