-
-
Notifications
You must be signed in to change notification settings - Fork 164
/
top_selector_pseudo_classes.sql
108 lines (100 loc) · 2.29 KB
/
top_selector_pseudo_classes.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#standardSQL
CREATE TEMPORARY FUNCTION getSelectorParts(css STRING)
RETURNS STRUCT<
class ARRAY<STRING>,
id ARRAY<STRING>,
attribute ARRAY<STRING>,
pseudo_class ARRAY<STRING>,
pseudo_element ARRAY<STRING>
>
LANGUAGE js
OPTIONS (library = "gs://httparchive/lib/css-utils.js")
AS '''
try {
function compute(ast) {
let ret = {
class: {},
id: {},
attribute: {},
"pseudo-class": {},
"pseudo-element": {}
};
walkSelectors(ast, selector => {
let sast = parsel.parse(selector, {list: false});
parsel.walk(sast, node => {
if (node.type in ret) {
incrementByKey(ret[node.type], node.name);
}
}, {subtree: true});
});
for (let type in ret) {
ret[type] = sortObject(ret[type]);
}
return ret;
}
function unzip(obj) {
return Object.entries(obj).filter(([name, value]) => {
return !isNaN(value);
}).map(([name, value]) => name);
}
const ast = JSON.parse(css);
let parts = compute(ast);
return {
class: unzip(parts.class),
id: unzip(parts.id),
attribute: unzip(parts.attribute),
pseudo_class: unzip(parts['pseudo-class']),
pseudo_element: unzip(parts['pseudo-element'])
}
} catch (e) {
return null;
}
''';
WITH totals AS (
SELECT
_TABLE_SUFFIX AS client,
COUNT(0) AS total_pages
FROM
`httparchive.summary_pages.2022_07_01_*` -- noqa: L062
GROUP BY
client
)
SELECT
client,
pages,
total_pages,
pct_pages,
pseudo_class.value AS pseudo_class,
pseudo_class.count AS freq,
pseudo_class.count / pages AS pct
FROM (
SELECT
client,
COUNT(DISTINCT page) AS pages,
ANY_VALUE(total_pages) AS total_pages,
COUNT(DISTINCT page) / ANY_VALUE(total_pages) AS pct_pages,
APPROX_TOP_COUNT(pseudo_class, 100) AS pseudo_classes
FROM (
SELECT DISTINCT
client,
page,
pseudo_class
FROM
`httparchive.almanac.parsed_css`
LEFT JOIN
UNNEST(getSelectorParts(css).pseudo_class) AS pseudo_class
WHERE
date = '2022-07-01' AND
# Limit the size of the CSS to avoid OOM crashes.
LENGTH(css) < 0.1 * 1024 * 1024)
JOIN
totals
USING
(client)
GROUP BY
client),
UNNEST(pseudo_classes) AS pseudo_class
WHERE
pseudo_class.value IS NOT NULL
ORDER BY
pct DESC