SQL script to perform linear regression analysis on the COVIDcast data
Save this script as "analysis-queries.sql"
create or replace view covidcast_fb_survey_results_v as
select
survey_date,
state,
mask_wearing_pct,
cmnty_symptoms_pct as symptoms_pct
from covidcast_fb_survey_results;
\o analysis-results/analysis-queries.txt
\t on
select 'Symptoms by state for survey date = 2020-10-21.';
\t off
select
round(mask_wearing_pct) as "% wearing mask",
round(symptoms_pct) as "% with symptoms",
state
from covidcast_fb_survey_results_v
where survey_date = to_date('2020-10-21', 'yyyy-mm-dd')
order by 1;
\t on
select 'Symptoms by state, overall average.';
\t off
select
round(avg(mask_wearing_pct)) as "% wearing mask",
round(avg(symptoms_pct)) as "% with symptoms",
state
from covidcast_fb_survey_results_v
group by state
order by 1;
\t on
select 'Daily regression analysis report.';
\t off
with a as (
select
survey_date,
avg (mask_wearing_pct) as mask_wearing_pct,
avg (symptoms_pct) as symptoms_pct,
regr_r2 (symptoms_pct, mask_wearing_pct) as r2,
regr_slope (symptoms_pct, mask_wearing_pct) as s,
regr_intercept(symptoms_pct, mask_wearing_pct) as i
from covidcast_fb_survey_results_v
group by survey_date)
select
to_char(survey_date, 'mm/dd') as survey_date,
to_char(mask_wearing_pct, '90') as mask_wearing_pct,
to_char(symptoms_pct, '90') as symptoms_pct,
to_char(r2, '0.99') as r2,
to_char(s, '90.9') as s,
to_char(i, '990.9') as i
from a
order by survey_date;
\t on
select 'Regression analysis report for survey date = 2020-10-21.';
\t off
with a as (
select
max(survey_date) as survey_date,
regr_slope (symptoms_pct, mask_wearing_pct) as s,
regr_intercept(symptoms_pct, mask_wearing_pct) as i
from covidcast_fb_survey_results_v
where survey_date = to_date('2020-10-21', 'yyyy-mm-dd'))
select
to_char(survey_date, 'mm/dd') as survey_date,
to_char(s, '90.9') as s,
to_char(i, '990.9') as i
from a;
with a as (
select regr_r2 (symptoms_pct, mask_wearing_pct) as r2
from covidcast_fb_survey_results_v
group by survey_date)
select
to_char(avg(r2), '0.99') as "avg(R-squared)"
from a;
\o
\o analysis-results/2020-10-21-mask-symptoms.csv
\t on
select
round(mask_wearing_pct)::text||','||round(symptoms_pct)::text
from covidcast_fb_survey_results_v
where survey_date = to_date('2020-10-21', 'yyyy-mm-dd')
order by 1;
\t off
\o