Skip to content

Commit

Permalink
CSS 2022 queries (#2937)
Browse files Browse the repository at this point in the history
* Update README.md

* usage

* batch copy of 2021 queries

* rm helper script

* add bq helper script

* totals

* css

* lint

* rm helper

* nit

* fix

* use 2022-06 for image queries

* updates

* additional queries

* fixes

* linter

* linter

* filter fn

* filter, clip path, blend mode

* linter

* print styles

* print @page fixes

* couple more fixes

* color alpha functions

* more colors

* last time

* supports, keyframes, flex-basis
  • Loading branch information
rviscomi committed Aug 13, 2022
1 parent b3cc535 commit 744659f
Show file tree
Hide file tree
Showing 92 changed files with 7,919 additions and 9 deletions.
9 changes: 0 additions & 9 deletions sql/2022/css/README.md
Original file line number Diff line number Diff line change
@@ -1,14 +1,5 @@
# 2022 CSS queries

<!--
This directory contains all of the 2022 CSS chapter queries.
Each query should have a corresponding `metric_name.sql` file.
Note that readers are linked to this directory, so try to make the SQL file names descriptive for easy browsing.
Analysts: if helpful, you can use this README to give additional info about the queries.
-->

## Resources

- [📄 Planning doc][~google-doc]
Expand Down
70 changes: 70 additions & 0 deletions sql/2022/css/all_properties.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#standardSQL
CREATE TEMPORARY FUNCTION getProperties(css STRING)
RETURNS ARRAY<STRING>
LANGUAGE js
OPTIONS (library = "gs://httparchive/lib/css-utils.js")
AS '''
try {
function compute(ast) {
let ret = {};
walkDeclarations(ast, ({property, value}) => {
if (!property.startsWith("--")) { // Custom props are case sensitive
property = property.toLowerCase();
}
incrementByKey(ret, property);
});
return sortObject(ret);
}
let ast = JSON.parse(css);
let props = compute(ast);
return Object.entries(props).flatMap(([prop, freq]) => {
return Array(freq).fill(prop);
});
}
catch (e) {
return [];
}
''';

WITH totals AS (
SELECT
_TABLE_SUFFIX AS client,
COUNT(0) AS total_pages
FROM
`httparchive.summary_pages.2022_07_01_*`
GROUP BY
client
)

SELECT
*
FROM (
SELECT
client,
prop,
COUNT(DISTINCT page) AS pages,
ANY_VALUE(total_pages) AS total_pages,
COUNT(DISTINCT page) / ANY_VALUE(total_pages) AS pct_pages,
COUNT(0) AS freq,
SUM(COUNT(0)) OVER (PARTITION BY client) AS total,
COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct
FROM
`httparchive.almanac.parsed_css`,
UNNEST(getProperties(css)) AS prop
JOIN
totals
USING
(client)
WHERE
date = '2022-07-01'
GROUP BY
client,
prop)
WHERE
pages >= 1000
ORDER BY
pct DESC
39 changes: 39 additions & 0 deletions sql/2022/css/box_sizing.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#standardSQL
# 1. Distribution of the number of occurrences of box-sizing:border-box per page.
# 2. Percent of pages with that style.
CREATE TEMPORARY FUNCTION countBorderBoxDeclarations(css STRING)
RETURNS NUMERIC
LANGUAGE js
OPTIONS (library = "gs://httparchive/lib/css-utils.js")
AS '''
try {
const ast = JSON.parse(css);
return countDeclarations(ast.stylesheet.rules, {properties: /^(-(o|moz|webkit|ms)-)?box-sizing$/, values: 'border-box'});
} catch (e) {
return null;
}
''';

SELECT
percentile,
client,
COUNT(DISTINCT IF(declarations > 0, page, NULL)) AS pages,
COUNT(DISTINCT page) AS total,
COUNT(DISTINCT IF(declarations > 0, page, NULL)) / COUNT(DISTINCT page) AS pct_pages,
APPROX_QUANTILES(declarations, 1000 IGNORE NULLS)[OFFSET(percentile * 10)] AS declarations_per_page
FROM (
SELECT
client,
page,
countBorderBoxDeclarations(css) AS declarations
FROM
`httparchive.almanac.parsed_css`
WHERE
date = '2022-07-01'),
UNNEST([10, 25, 50, 75, 90]) AS percentile
GROUP BY
percentile,
client
ORDER BY
percentile,
client
65 changes: 65 additions & 0 deletions sql/2022/css/box_sizing_border_box_selectors.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#standardSQL
# Top selectors used with box-sizing: border-box
CREATE TEMP FUNCTION
getBorderBoxSelectors(css STRING)
RETURNS ARRAY<STRING>
LANGUAGE js AS '''
try {
var $ = JSON.parse(css);
return $.stylesheet.rules.flatMap(rule => {
if (!rule.selectors) {
return [];
}
const boxSizingPattern = /^(-(o|moz|webkit|ms)-)?box-sizing$/;
const borderBoxPattern = /border-box/;
if (!rule.declarations.find(d => {
return boxSizingPattern.test(d.property) && borderBoxPattern.test(d.value);
})) {
return [];
}
return rule.selectors;
});
} catch (e) {
return [];
}
''';
SELECT
*
FROM (
SELECT
client,
selector,
COUNT(DISTINCT page) AS pages,
ANY_VALUE(total_pages) AS total_pages,
COUNT(DISTINCT page) / ANY_VALUE(total_pages) AS pct_pages,
COUNT(0) AS freq,
SUM(COUNT(0)) OVER (PARTITION BY client) AS total,
COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct
FROM (
SELECT
client,
page,
selector
FROM
`httparchive.almanac.parsed_css`,
UNNEST(getBorderBoxSelectors(css)) AS selector
WHERE
date = '2022-07-01')
JOIN (
SELECT
_TABLE_SUFFIX AS client,
COUNT(0) AS total_pages
FROM
`httparchive.summary_pages.2022_07_01_*`
GROUP BY
client)
USING
(client)
GROUP BY
client,
selector)
ORDER BY
pct DESC
LIMIT 1000
90 changes: 90 additions & 0 deletions sql/2022/css/calc_complexity_units.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#standardSQL
CREATE TEMPORARY FUNCTION getCalcUnitComplexity(css STRING)
RETURNS ARRAY<STRUCT<num INT64, freq INT64>>
LANGUAGE js
OPTIONS (library = "gs://httparchive/lib/css-utils.js")
AS '''
try {
function compute(ast) {
let ret = {
total: 0,
properties: {},
units: {},
number_of_different_units: {},
operators: {},
number_of_operators: {},
number_of_parens: {},
constants: new Set()
};
walkDeclarations(ast, ({property, value}) => {
for (let calc of extractFunctionCalls(value, {names: "calc"})) {
incrementByKey(ret.properties, property);
ret.total++;
let args = calc.args.replace(/calc\\(/g, "(");
let units = args.match(/[a-z]+|%/g) || [];
units.forEach(e => incrementByKey(ret.units, e));
incrementByKey(ret.number_of_different_units, new Set(units).size);
let ops = args.match(/[-+\\/*]/g) || [];
ops.forEach(e => incrementByKey(ret.operators, e));
incrementByKey(ret.number_of_operators, ops.length);
let parens = args.match(/\\(/g) || [];
incrementByKey(ret.number_of_parens, parens.length);
if (units.length === 0) {
ret.constants.add(args);
}
}
}, {
values: /calc\\(/,
not: {
values: /var\\(--/
}
});
ret.constants = [...ret.constants];
for (let type in ret) {
if (ret[type].constructor === Object) {
ret[type] = sortObject(ret[type]);
}
}
return ret;
}
var ast = JSON.parse(css);
var calc = compute(ast);
return Object.entries(calc.number_of_different_units).map(([num, freq]) => ({num, freq}))
} catch (e) {
return [];
}
''';

SELECT
client,
num,
SUM(freq) AS freq,
SUM(SUM(freq)) OVER (PARTITION BY client) AS total,
SUM(freq) / SUM(SUM(freq)) OVER (PARTITION BY client) AS pct
FROM (
SELECT
client,
url,
unit.num,
unit.freq
FROM
`httparchive.almanac.parsed_css`,
UNNEST(getCalcUnitComplexity(css)) AS unit
WHERE
date = '2022-07-01' AND
# Limit the size of the CSS to avoid OOM crashes.
LENGTH(css) < 0.1 * 1024 * 1024)
GROUP BY
client,
num
ORDER BY
pct DESC
108 changes: 108 additions & 0 deletions sql/2022/css/calc_operators.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
#standardSQL
CREATE TEMPORARY FUNCTION getCalcOperators(css STRING)
RETURNS ARRAY<STRUCT<name STRING, freq INT64>>
LANGUAGE js
OPTIONS (library = "gs://httparchive/lib/css-utils.js")
AS '''
try {
function compute(ast) {
let ret = {
total: 0,
properties: {},
units: {},
number_of_different_units: {},
operators: {},
number_of_operators: {},
number_of_parens: {},
constants: new Set()
};
walkDeclarations(ast, ({property, value}) => {
for (let calc of extractFunctionCalls(value, {names: "calc"})) {
incrementByKey(ret.properties, property);
ret.total++;
let args = calc.args.replace(/calc\\(/g, "(");
let units = args.match(/[a-z]+|%/g) || [];
units.forEach(e => incrementByKey(ret.units, e));
incrementByKey(ret.number_of_different_units, new Set(units).size);
let ops = args.match(/[-+\\/*]/g) || [];
ops.forEach(e => incrementByKey(ret.operators, e));
incrementByKey(ret.number_of_operators, ops.length);
let parens = args.match(/\\(/g) || [];
incrementByKey(ret.number_of_parens, parens.length);
if (units.length === 0) {
ret.constants.add(args);
}
}
}, {
values: /calc\\(/,
not: {
values: /var\\(--/
}
});
ret.constants = [...ret.constants];
for (let type in ret) {
if (ret[type].constructor === Object) {
ret[type] = sortObject(ret[type]);
}
}
return ret;
}
var ast = JSON.parse(css);
var calc = compute(ast);
return Object.entries(calc.operators).map(([name, freq]) => ({name, freq}))
} catch (e) {
return [];
}
''';

WITH totals AS (
SELECT
_TABLE_SUFFIX AS client,
COUNT(0) AS total_pages
FROM
`httparchive.summary_pages.2022_07_01_*`
GROUP BY
client
)

SELECT
client,
operator,
COUNT(DISTINCT page) AS pages,
ANY_VALUE(total_pages) AS total_pages,
COUNT(DISTINCT page) / ANY_VALUE(total_pages) AS pct_pages,
SUM(freq) AS freq,
SUM(SUM(freq)) OVER (PARTITION BY client) AS total,
SUM(freq) / SUM(SUM(freq)) OVER (PARTITION BY client) AS pct
FROM (
SELECT
client,
page,
url,
operator.name AS operator,
operator.freq
FROM
`httparchive.almanac.parsed_css`,
UNNEST(getCalcOperators(css)) AS operator
WHERE
date = '2022-07-01' AND
# Limit the size of the CSS to avoid OOM crashes.
LENGTH(css) < 0.1 * 1024 * 1024)
JOIN
totals
USING
(client)
GROUP BY
client,
operator
ORDER BY
pct DESC
Loading

0 comments on commit 744659f

Please sign in to comment.