UNPKG

8.63 kBJavaScriptView Raw
1/* eslint-disable import/no-extraneous-dependencies */
2import fs from 'fs';
3import { gzipSync } from 'zlib';
4
5import jsonStableStringify from 'json-stable-stringify';
6import gaApi from 'ga-api';
7import moment from 'moment';
8import random from 'random';
9
10
11// Custom dimensions, see: https://intoli.com/blog/user-agents/
12const customDimensionMap = {
13 'ga:dimension1': 'appName',
14 'ga:dimension2': 'connection',
15 'ga:dimension3': 'cpuClass',
16 'ga:dimension5': 'oscpu',
17 'ga:dimension6': 'platform',
18 'ga:dimension7': 'pluginsLength',
19 'ga:dimension8': 'vendor',
20 'ga:dimension9': 'userAgent',
21};
22// And the special timestamp session ID that we'll use for joining data.
23const sessionIdDimension = 'ga:dimension10';
24
25// Standard dimensions used by Google Analytics.
26const standardDimensionMap = {
27 'ga:browserSize': 'browserSize',
28 'ga:deviceCategory': 'deviceCategory',
29 'ga:screenResolution': 'screenResolution',
30};
31
32
33// These primarily help map missing data to `null`/`undefined` properly.
34const parseCustomDimension = (value, json = false) => {
35 if (value === 'null') {
36 return null;
37 }
38 if (value === 'undefined') {
39 return undefined;
40 }
41 if (json && value) {
42 try {
43 return parseCustomDimension(JSON.parse(value));
44 } catch (error) {
45 console.error(`Error parsing "${value}" as JSON.`, error);
46 return null;
47 }
48 }
49 if (typeof value === 'object' && value !== null) {
50 const parsedObject = {};
51 Object.entries(value).forEach(([key, childValue]) => {
52 parsedObject[key] = parseCustomDimension(childValue);
53 });
54 return parsedObject;
55 }
56 return value;
57};
58
59const parseStandardDimension = value => (
60 value === '(not set)' ? null : value
61);
62
63
64const fetchAnalyticsRows = (dimensions, page = 0) => new Promise((resolve, reject) => {
65 // Fetch session data from the last 24-48 hours.
66 const maximumAgeInDays = parseInt(process.env.MAXIMUM_AGE || 1, 10);
67 const endDate = moment().format('YYYY-MM-DD');
68 const startDate = moment().subtract(maximumAgeInDays, 'days').format('YYYY-MM-DD');
69
70 // This is the maximum value allowed by the API.
71 const maxResults = 10000;
72 const startIndex = 1 + (page * maxResults);
73
74 gaApi({
75 // Credential details.
76 clientId: 'user-agents-npm-package-update.apps.googleusercontent.com',
77 email: 'user-agents-npm-package-update@user-agents-npm-package.iam.gserviceaccount.com',
78 key: 'google-analytics-credentials.json',
79 ids: 'ga:115995502',
80 // Request details.
81 endDate,
82 dimensions: dimensions.join(','),
83 maxResults,
84 metrics: 'ga:sessions',
85 sort: sessionIdDimension,
86 startDate,
87 startIndex,
88 }, (error, data) => {
89 if (error) {
90 return reject(error);
91 }
92 return resolve(data.rows);
93 }, { cacheDir: '.' });
94});
95
96
97const getRawSessions = async () => {
98 // We can request a maximum of 7 dimensions at once, so we need to break these up into groups
99 // of 6 + 1 (the 1 being the session ID). We can then join these together into higher dimensional
100 // objects based on the common session IDs.
101 const maximumDimensionsPerRequest = 7;
102 const dimensions = Object.keys(customDimensionMap).concat(Object.keys(standardDimensionMap));
103 const dimensionGroupCount = Math.ceil(dimensions.length / (maximumDimensionsPerRequest - 1));
104 const dimensionGroups = [];
105 for (let i = 0; i < dimensionGroupCount; i += 1) {
106 const startIndex = (maximumDimensionsPerRequest - 1) * i;
107 const endIndex = (startIndex + maximumDimensionsPerRequest) - 1;
108 dimensionGroups.push([sessionIdDimension].concat(dimensions.slice(startIndex, endIndex)));
109 }
110
111 // Now we loop through and paginate the results, joining the dimensions by session ID as we go.
112 const sessions = {};
113 const groupCounts = {};
114 let page = 0;
115 let newRowCount;
116 do {
117 newRowCount = 0;
118 for (let groupIndex = 0; groupIndex < dimensionGroupCount; groupIndex += 1) {
119 const dimensionGroup = dimensionGroups[groupIndex];
120 const rows = (await fetchAnalyticsRows(dimensionGroup, page)) || [];
121 newRowCount = Math.max(newRowCount, rows.length);
122 rows.forEach((row) => {
123 const sessionId = row[0];
124 groupCounts[sessionId] = (groupCounts[sessionId] || 0) + 1;
125
126 sessions[sessionId] = sessions[sessionId] || {};
127 // Exclude the session ID (first) and the session count metric (last).
128 row.slice(1, -1).forEach((value, index) => {
129 sessions[sessionId][dimensionGroup[index + 1]] = value;
130 });
131 });
132 }
133
134 // Move on to the next page of requests if necessary.
135 page += 1;
136 } while (newRowCount > 0);
137
138 // Delete any partial data.
139 Object.keys(sessions).forEach((sessionId) => {
140 if (groupCounts[sessionId] !== dimensionGroupCount) {
141 delete sessions[sessionId];
142 }
143 });
144
145 return sessions;
146};
147
148
149const parseSessions = (rawSessions) => {
150 const sessions = {};
151 Object.entries(rawSessions).forEach(([sessionId, rawSession]) => {
152 const session = {
153 timestamp: parseInt(sessionId.split('-')[0], 10),
154 };
155
156 Object.entries(customDimensionMap).forEach(([rawDimension, dimension]) => {
157 const json = dimension === 'connection';
158 session[dimension] = parseCustomDimension(rawSession[rawDimension], json);
159 if (dimension === 'connection' && session[dimension]) {
160 if (session[dimension].rtt) {
161 session[dimension].rtt = parseInt(session[dimension].rtt, 10);
162 }
163 if (session[dimension].downlink) {
164 session[dimension].downlink = parseFloat(session[dimension].downlink);
165 }
166 if (session[dimension].downlinkMax) {
167 session[dimension].downlinkMax = parseFloat(session[dimension].downlinkMax);
168 }
169 }
170
171 if (dimension === 'pluginsLength') {
172 session[dimension] = parseInt(session[dimension], 10);
173 }
174 });
175
176 Object.entries(standardDimensionMap).forEach(([rawDimension, dimension]) => {
177 const value = parseStandardDimension(rawSession[rawDimension]);
178 if (dimension === 'browserSize' || dimension === 'screenResolution') {
179 let height = null;
180 let width = null;
181 if (/\d+x\d+/.test(value)) {
182 [width, height] = value.split('x').map(pixels => parseInt(pixels, 10));
183 }
184 const dimensionPrefix = dimension === 'browserSize' ? 'viewport' : 'screen';
185 session[`${dimensionPrefix}Height`] = height;
186 session[`${dimensionPrefix}Width`] = width;
187 } else {
188 session[dimension] = value;
189 }
190 });
191
192 sessions[sessionId] = session;
193 });
194
195 return sessions;
196};
197
198
199const getUserAgentTable = async () => {
200 // Fetch the sessions and process them into parsed objects.
201 const rawSessions = await getRawSessions();
202 const sessions = parseSessions(rawSessions);
203
204 // Calculate the number of unique occurrences of each fingerprint.
205 const uniqueSessions = {};
206 Object.values(sessions).forEach((session) => {
207 // Exclude headless browser user agents.
208 if (/headless/i.test(session.userAgent)) {
209 return;
210 }
211
212 const uniqueKey = jsonStableStringify(session);
213 if (!uniqueSessions[uniqueKey]) {
214 uniqueSessions[uniqueKey] = {
215 ...session,
216 weight: 0,
217 };
218 delete uniqueSessions[uniqueKey].timestamp;
219 }
220 uniqueSessions[uniqueKey].weight += 1;
221 });
222
223 // Normalize the weights to 1.
224 let totalWeight = 0;
225
226 const n = () => random.normal();
227 Object.values(uniqueSessions).forEach((session) => {
228 // eslint-disable-next-line no-param-reassign
229 session.weight = Array(2 * session.weight).fill().reduce(sum => sum + (n()() ** 2), 0) / 2;
230 totalWeight += session.weight;
231 });
232 Object.values(uniqueSessions).forEach((session) => {
233 // eslint-disable-next-line no-param-reassign
234 session.weight /= totalWeight;
235 });
236
237 // Sort them by descreasing weight.
238 const sessionList = Object.values(uniqueSessions);
239 sessionList.sort((a, b) => b.weight - a.weight);
240
241 return sessionList;
242};
243
244
245if (!module.parent) {
246 const filename = process.argv[2];
247 if (!filename) {
248 throw new Error('An output filename must be passed as an argument to the command.');
249 }
250 getUserAgentTable().then(async (userAgents) => {
251 const stringifiedUserAgents = JSON.stringify(userAgents, null, 2);
252 // Compress the content if the extension ends with `.gz`.
253 const content = filename.endsWith('.gz')
254 ? gzipSync(stringifiedUserAgents)
255 : stringifiedUserAgents;
256 fs.writeFileSync(filename, content);
257 })
258 .catch((error) => {
259 // eslint-disable-next-line no-console
260 console.error(error);
261 process.exit(1);
262 });
263}
264
265
266export default getUserAgentTable;