Example Postgres Load

You are connected to database "sahsuland" as user "rif40" on host "localhost" at port "5432".
DO LANGUAGE plpgsql $$
DECLARE
        c1 CURSOR FOR
                SELECT p.proname
                  FROM pg_proc p, pg_namespace n
                 WHERE p.proname  = 'rif40_startup'
                   AND n.nspname  = 'rif40_sql_pkg'
                   AND p.proowner = (SELECT oid FROM pg_roles WHERE rolname = 'rif40')
                   AND n.oid      = p.pronamespace;
--
        c1_rec RECORD;
        sql_stmt VARCHAR;
BEGIN
        OPEN c1;
        FETCH c1 INTO c1_rec;
        CLOSE c1;
--
        IF c1_rec.proname = 'rif40_startup' THEN
                PERFORM rif40_sql_pkg.rif40_startup();
        ELSE
                RAISE INFO 'RIF startup: not a RIF database';
        END IF;
--
-- Set a default path, schema to user
--
        IF current_user = 'rif40' THEN
                sql_stmt:='SET search_path TO rif40, public, topology, gis, pop, rif_data, data_load, rif40_sql_pkg, rif_studies, rif40_partitions';
        ELSE
                sql_stmt:='SET search_path TO '||USER||',rif40, public, topology, gis, pop, rif_data, data_load, rif40_sql_pkg, rif_studies, rif40_partitions';
        END IF;
        RAISE INFO 'SQL> %;', sql_stmt::VARCHAR;
        EXECUTE sql_stmt;
END;
$$;
psql:C:/Program Files/PostgreSQL/9.6/etc/psqlrc:48: INFO:  rif40_log_setup() DEFAULTED send DEBUG to INFO: off; debug function list: []
psql:C:/Program Files/PostgreSQL/9.6/etc/psqlrc:48: INFO:  +00000.01s  rif40_startup(): search_path not set for: rif40
psql:C:/Program Files/PostgreSQL/9.6/etc/psqlrc:48: INFO:  +00000.01s  rif40_startup(): SQL> DROP FUNCTION IF EXISTS rif40.rif40_run_study(INTEGER, INTEGER);
psql:C:/Program Files/PostgreSQL/9.6/etc/psqlrc:48: NOTICE:  function rif40.rif40_run_study(pg_catalog.int4,pg_catalog.int4) does not exist, skipping
psql:C:/Program Files/PostgreSQL/9.6/etc/psqlrc:48: INFO:  +00000.06s  rif40_startup(): Created temporary table: g_rif40_study_areas
psql:C:/Program Files/PostgreSQL/9.6/etc/psqlrc:48: INFO:  +00000.08s  rif40_startup(): Created temporary table: g_rif40_comparison_areas
psql:C:/Program Files/PostgreSQL/9.6/etc/psqlrc:48: INFO:  +00000.21s  rif40_startup(): PostGIS extension V2.3.5 (POSTGIS="2.3.5 r16110" GEOS="3.6.2-CAPI-1.10.2 4d2925d" PROJ="Rel. 4.9.3, 15 August 2016" GDAL="GDAL 2.2.2, released 2017/09/15" LIBXML="2.7.8" LIBJSON="0.12" RASTER)
psql:C:/Program Files/PostgreSQL/9.6/etc/psqlrc:48: INFO:  +00000.21s  rif40_startup(): FDW functionality disabled - FDWServerName, FDWServerType, FDWDBServer RIF parameters not set.
psql:C:/Program Files/PostgreSQL/9.6/etc/psqlrc:48: INFO:  +00000.21s  rif40_startup(): V$Revision: 1.11 $ DB version $Revision: 1.11 $ matches
psql:C:/Program Files/PostgreSQL/9.6/etc/psqlrc:48: INFO:  +00000.21s  rif40_startup(): V$Revision: 1.11 $ rif40_geographies, rif40_tables, rif40_health_study_themes exist for user: rif40
psql:C:/Program Files/PostgreSQL/9.6/etc/psqlrc:48: INFO:  +00000.21s  rif40_startup(): search_path: public, topology, gis, pop, rif_data, data_load, rif40_sql_pkg, rif_studies, rif40_partitions, reset: rif40, public, topology, gis, pop, rif_data, data_load, rif40_sql_pkg, rif_studies, rif40_partitions
psql:C:/Program Files/PostgreSQL/9.6/etc/psqlrc:48: INFO:  +00000.21s  rif40_startup(): Deleted 0, created 2 tables/views/foreign data wrapper tables
psql:C:/Program Files/PostgreSQL/9.6/etc/psqlrc:48: INFO:  SQL> SET search_path TO rif40, public, topology, gis, pop, rif_data, data_load, rif40_sql_pkg, rif_studies, rif40_partitions;
DO
Pager usage is off.
\set ON_ERROR_STOP ON
\timing
Timing is on.

\i pg_rif40_load_seer_cancer.sql
-- ************************************************************************
--
-- Description:
--
-- Rapid Enquiry Facility (RIF) - Load SEER numerator data into RIF; Does not reformat into RIF4.0 format (see load_seer.sql)
--                                                                * Requires the "seer_user" role
--                                Postgres script
--
-- Copyright:
--
-- The Rapid Inquiry Facility (RIF) is an automated tool devised by SAHSU
-- that rapidly addresses epidemiological and public health questions using
-- routinely collected health and population data and generates standardised
-- rates and relative risks for any given health outcome, for specified age
-- and year ranges, for any given geographical area.
--
-- Copyright 2014 Imperial College London, developed by the Small Area
-- Health Statistics Unit. The work of the Small Area Health Statistics Unit
-- is funded by the Public Health England as part of the MRC-PHE Centre for
-- Environment and Health. Funding for this project has also been received
-- from the Centers for Disease Control and Prevention.
--
-- This file is part of the Rapid Inquiry Facility (RIF) project.
-- RIF is free software: you can redistribute it and/or modify
-- it under the terms of the GNU Lesser General Public License as published by
-- the Free Software Foundation, either version 3 of the License, or
-- (at your option) any later version.
--
-- RIF is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- GNU Lesser General Public License for more details.
--
-- You should have received a copy of the GNU Lesser General Public License
-- along with RIF. If not, see <http://www.gnu.org/licenses/>; or write
-- to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
-- Boston, MA 02110-1301 USA
--
-- Author:
--
-- Peter Hambly, SAHSU
--
-- Postgres RIF40 specific parameters
--
-- Usage: psql -U rif40 -w -e -f pg_rif40_load_seer_cancer.sql
-- Connect flags if required: -d <Postgres database name> -h <host> -p <port>
--
-- Requires RIF USA county level geography to be loaded: rif_pg_USA_2014.sql
--

\pset pager off
Pager usage is off.
\set ECHO all
\set ON_ERROR_STOP ON
\timing
Timing is off.

--
-- Start transaction
--
BEGIN TRANSACTION;
BEGIN

--
-- Setup RIF user
--
DO LANGUAGE plpgsql $$
DECLARE
        c1 CURSOR FOR
                SELECT p.proname
                  FROM pg_proc p, pg_namespace n
                 WHERE p.proname  = 'rif40_startup'
                   AND n.nspname  = 'rif40_sql_pkg'
                   AND p.proowner = (SELECT oid FROM pg_roles WHERE rolname = 'rif40')
                   AND n.oid      = p.pronamespace;
--
        c1_rec RECORD;
        sql_stmt VARCHAR;
BEGIN
        OPEN c1;
        FETCH c1 INTO c1_rec;
        CLOSE c1;
--
        IF c1_rec.proname = 'rif40_startup' THEN
                PERFORM rif40_sql_pkg.rif40_startup();
        ELSE
                RAISE EXCEPTION 'RIF startup(SEER loader): not a RIF database';
        END IF;
--
-- Set a default path and schema for user
--
        IF current_user = 'rif40' THEN
                sql_stmt:='SET SESSION search_path TO rif_data /* default schema */, rif40, public, topology, gis, pop, data_load, rif40_sql_pkg, rif_studies, rif40_partitions';
        ELSE
                RAISE EXCEPTION 'RIF startup(SEER loader): RIF user: % is not rif40', current_user;
        END IF;
        RAISE INFO 'SQL> %;', sql_stmt::VARCHAR;
        EXECUTE sql_stmt;
END;
$$;
psql:pg_rif40_load_seer_cancer.sql:97: INFO:  rif40_log_setup() send DEBUG to INFO: off; debug function list: []
psql:pg_rif40_load_seer_cancer.sql:97: INFO:  +00000.03s  rif40_startup(): search_path not set for: rif40
psql:pg_rif40_load_seer_cancer.sql:97: INFO:  +00000.03s  rif40_startup(): SQL> DROP FUNCTION IF EXISTS rif40.rif40_run_study(INTEGER, INTEGER);
psql:pg_rif40_load_seer_cancer.sql:97: NOTICE:  function rif40.rif40_run_study(pg_catalog.int4,pg_catalog.int4) does not exist, skipping
psql:pg_rif40_load_seer_cancer.sql:97: INFO:  +00000.04s  rif40_startup(): Temporary table: g_rif40_comparison_areas exists
psql:pg_rif40_load_seer_cancer.sql:97: INFO:  +00000.05s  rif40_startup(): PostGIS extension V2.3.5 (POSTGIS="2.3.5 r16110" GEOS="3.6.2-CAPI-1.10.2 4d2925d" PROJ="Rel. 4.9.3, 15 August 2016" GDAL="GDAL 2.2.2, released 2017/09/15" LIBXML="2.7.8" LIBJSON="0.12" RASTER)
psql:pg_rif40_load_seer_cancer.sql:97: INFO:  +00000.05s  rif40_startup(): FDW functionality disabled - FDWServerName, FDWServerType, FDWDBServer RIF parameters not set.
psql:pg_rif40_load_seer_cancer.sql:97: INFO:  +00000.05s  rif40_startup(): V$Revision: 1.11 $ DB version $Revision: 1.11 $ matches
psql:pg_rif40_load_seer_cancer.sql:97: INFO:  +00000.05s  rif40_startup(): V$Revision: 1.11 $ rif40_geographies, rif40_tables, rif40_health_study_themes exist for user: rif40
psql:pg_rif40_load_seer_cancer.sql:97: INFO:  +00000.05s  rif40_startup(): search_path: public, topology, gis, pop, rif_data, data_load, rif40_sql_pkg, rif_studies, rif40_partitions, reset: rif40, public, topology, gis, pop, rif_data, data_load, rif40_sql_pkg, rif_studies, rif40_partitions
psql:pg_rif40_load_seer_cancer.sql:97: INFO:  SQL> SET SESSION search_path TO rif_data /* default schema */, rif40, public, topology, gis, pop, data_load, rif40_sql_pkg, rif_studies, rif40_partitions;
DO

--
-- Check if geography is loaded
--
DO LANGUAGE plpgsql $$
DECLARE
        c1 CURSOR FOR
                SELECT geography
                  FROM rif40_geographies a
                 WHERE a.geography  = 'USA_2014';
        c1_rec RECORD;
BEGIN
        OPEN c1;
        FETCH c1 INTO c1_rec;
        CLOSE c1;
--
        IF c1_rec.geography = 'USA_2014' THEN
                RAISE INFO 'Geography: USA_2014 loaded';
        ELSE
                RAISE EXCEPTION 'Geography: USA_2014 is NOT loaded';
        END IF;
END;
$$;
psql:pg_rif40_load_seer_cancer.sql:120: INFO:  Geography: USA_2014 loaded
DO

--
-- Check SEER_USER role is present (needs to be created by an administrator)
--
DO LANGUAGE plpgsql $$
DECLARE
        c1 CURSOR FOR
                SELECT rolname
                  FROM pg_roles a
                 WHERE a.rolname  = 'seer_user';
        c1_rec RECORD;
BEGIN
        OPEN c1;
        FETCH c1 INTO c1_rec;
        CLOSE c1;
--
        IF c1_rec.rolname = 'seer_user' THEN
                RAISE INFO 'SEER_USER: role is present';
        ELSE
                RAISE EXCEPTION 'SEER_USER: role is NOT present';
        END IF;
END;
$$;
psql:pg_rif40_load_seer_cancer.sql:143: INFO:  SEER_USER: role is present
DO

--
-- Remove old SEER data
--
DROP TABLE IF EXISTS rif_data.t_seer_cancer;
psql:pg_rif40_load_seer_cancer.sql:148: NOTICE:  table "t_seer_cancer" does not exist, skipping
DROP TABLE
DROP VIEW IF EXISTS rif_data.seer_cancer;
psql:pg_rif40_load_seer_cancer.sql:149: NOTICE:  view "seer_cancer" does not exist, skipping
DROP VIEW

DELETE FROM rif40.rif40_table_outcomes
 WHERE numer_tab='SEER_CANCER';
DELETE 0

DELETE FROM rif40.rif40_tables
 WHERE table_name='SEER_CANCER';
DELETE 0

 --
-- Create SEER_CANCER numerator table
--
CREATE TABLE rif_data.t_seer_cancer
(
  year integer NOT NULL, -- Year
  cb_2014_us_nation_5m text, -- United States to county level including territories
  cb_2014_us_state_500k text NOT NULL, -- State geographic Names Information System (GNIS) code
  cb_2014_us_county_500k text NOT NULL, -- County geographic Names Information System (GNIS) code. Unjoined county FIPS codes to "UNKNOWN: " + county FIPS code; e.g. the 900 series to represent county/independent city combinations in Virginia.
  age_sex_group integer NOT NULL, -- RIF age_sex_group 1 (21 bands)
  icdot10v text, -- ICD 10 site code - recoded from ICD-O-2 to 10
  pubcsnum integer NOT NULL, -- Patient ID
  seq_num integer NOT NULL, -- Sequence number
  histo3v text, -- Histologic Type ICD-O-3
  beho3v text, -- Behavior code ICD-O-3
  rac_reca integer, -- Race recode A (WHITE, BLACK, OTHER)
  rac_recy integer, -- Race recode Y (W, B, AI, API)
  origrecb integer, -- Origin Recode NHIA (HISPANIC, NON-HISP)
  codpub text, -- Cause of death to SEER site recode (see: https://seer.cancer.gov/codrecode/1969+_d09172004/index.html)
  reg integer, -- SEER registry (minus 1500 so same as population file)
  CONSTRAINT seer_cancer_pk PRIMARY KEY (pubcsnum, seq_num),
  CONSTRAINT seer_cancer_asg_ck CHECK (
                        (age_sex_group >= 100 AND age_sex_group <= 121) OR
                        (age_sex_group >= 200 AND age_sex_group <= 221) OR
                        (age_sex_group IN (199, 299))
                )
        );
CREATE TABLE

--
-- Load
--
\copy rif_data.t_seer_cancer FROM 'seer_cancer.csv' WITH CSV HEADER;
COPY 9176963

--
-- Check rowcount
--
SELECT COUNT(*) AS total FROM t_seer_cancer;
  total
---------
 9176963
(1 row)

DO LANGUAGE plpgsql $$
DECLARE
        c1 CURSOR FOR
                SELECT COUNT(*) AS total
                  FROM t_seer_cancer;
        c1_rec RECORD;
BEGIN
        OPEN c1;
        FETCH c1 INTO c1_rec;
        CLOSE c1;
--
        IF c1_rec.total = 9176963 THEN
                RAISE INFO 'Table: t_seer_cancer has % rows', c1_rec.total;
        ELSE
                RAISE EXCEPTION 'Table: t_seer_cancer has % rows; expecting 9176963', c1_rec.total;
        END IF;
END;
$$;
psql:pg_rif40_load_seer_cancer.sql:211: INFO:  Table: t_seer_cancer has 9176963 rows
DO

--
-- Indexes
--
CREATE INDEX seer_cancer_age_sex_group
  ON rif_data.t_seer_cancer
  (age_sex_group);
CREATE INDEX

CREATE INDEX seer_cancer_cb_2014_us_county_500k
  ON rif_data.t_seer_cancer
  (cb_2014_us_county_500k);
CREATE INDEX

CREATE INDEX seer_cancer_cb_2014_us_nation_5m
  ON rif_data.t_seer_cancer
  (cb_2014_us_nation_5m);
CREATE INDEX

CREATE INDEX seer_cancer_cb_2014_us_state_500k
  ON rif_data.t_seer_cancer
  (cb_2014_us_state_500k);
CREATE INDEX

CREATE INDEX seer_cancer_icdot10v
  ON rif_data.t_seer_cancer
  (icdot10v);
CREATE INDEX

CREATE INDEX seer_cancer_reg
  ON rif_data.t_seer_cancer
  (reg);
CREATE INDEX

CREATE INDEX seer_cancer_year
  ON rif_data.t_seer_cancer
  (year);
CREATE INDEX

--
-- Create a test view
--
CREATE OR REPLACE VIEW rif_data.seer_cancer AS
SELECT * FROM rif_data.t_seer_cancer;
CREATE VIEW

COMMENT ON TABLE rif_data.t_seer_cancer
  IS 'SEER Cancer data 1973-2013. 9 States in total';
COMMENT
COMMENT ON COLUMN rif_data.t_seer_cancer.year IS 'Year';
COMMENT
COMMENT ON COLUMN rif_data.t_seer_cancer.cb_2014_us_nation_5m IS 'United States to county level including territories';
COMMENT
COMMENT ON COLUMN rif_data.t_seer_cancer.cb_2014_us_state_500k IS 'State geographic Names Information System (GNIS) code';
COMMENT
COMMENT ON COLUMN rif_data.t_seer_cancer.cb_2014_us_county_500k IS 'County geographic Names Information System (GNIS) code. Unjoined county FIPS codes to "UNKNOWN: " + county FIPS code; e.g. the 900 series to represent county/independent city combinations in Virginia.';
COMMENT
COMMENT ON COLUMN rif_data.t_seer_cancer.age_sex_group IS 'RIF age_sex_group 1 (21 bands)';
COMMENT
COMMENT ON COLUMN rif_data.t_seer_cancer.icdot10v IS 'ICD 10 site code - recoded from ICD-O-2 to 10';
COMMENT
COMMENT ON COLUMN rif_data.t_seer_cancer.pubcsnum IS 'Patient ID';
COMMENT
COMMENT ON COLUMN rif_data.t_seer_cancer.seq_num IS 'Sequence number';
COMMENT
COMMENT ON COLUMN rif_data.t_seer_cancer.histo3v IS 'Histologic Type ICD-O-3';
COMMENT
COMMENT ON COLUMN rif_data.t_seer_cancer.beho3v IS 'Behavior code ICD-O-3';
COMMENT
COMMENT ON COLUMN rif_data.t_seer_cancer.rac_reca IS 'Race recode A (WHITE, BLACK, OTHER)';
COMMENT
COMMENT ON COLUMN rif_data.t_seer_cancer.rac_recy IS 'Race recode Y (W, B, AI, API)';
COMMENT
COMMENT ON COLUMN rif_data.t_seer_cancer.origrecb IS 'Origin Recode NHIA (HISPANIC, NON-HISP)';
COMMENT
COMMENT ON COLUMN rif_data.t_seer_cancer.codpub IS 'Cause of death to SEER site recode (see: https://seer.cancer.gov/codrecode/1969+_d09172004/index.html)';
COMMENT
COMMENT ON COLUMN rif_data.t_seer_cancer.reg IS 'SEER registry (minus 1500 so same as population file)';
COMMENT

COMMENT ON VIEW rif_data.seer_cancer
  IS 'SEER Cancer data 1973-2013 view test. 9 States in total';
COMMENT
COMMENT ON COLUMN rif_data.seer_cancer.year IS 'Year';
COMMENT
COMMENT ON COLUMN rif_data.seer_cancer.cb_2014_us_nation_5m IS 'United States to county level including territories';
COMMENT
COMMENT ON COLUMN rif_data.seer_cancer.cb_2014_us_state_500k IS 'State geographic Names Information System (GNIS) code';
COMMENT
COMMENT ON COLUMN rif_data.seer_cancer.cb_2014_us_county_500k IS 'County geographic Names Information System (GNIS) code. Unjoined county FIPS codes to "UNKNOWN: " + county FIPS code; e.g. the 900 series to represent county/independent city combinations in Virginia.';
COMMENT
COMMENT ON COLUMN rif_data.seer_cancer.age_sex_group IS 'RIF age_sex_group 1 (21 bands)';
COMMENT
COMMENT ON COLUMN rif_data.seer_cancer.icdot10v IS 'ICD 10 site code - recoded from ICD-O-2 to 10';
COMMENT
COMMENT ON COLUMN rif_data.seer_cancer.pubcsnum IS 'Patient ID';
COMMENT
COMMENT ON COLUMN rif_data.seer_cancer.seq_num IS 'Sequence number';
COMMENT
COMMENT ON COLUMN rif_data.seer_cancer.histo3v IS 'Histologic Type ICD-O-3';
COMMENT
COMMENT ON COLUMN rif_data.seer_cancer.beho3v IS 'Behavior code ICD-O-3';
COMMENT
COMMENT ON COLUMN rif_data.seer_cancer.rac_reca IS 'Race recode A (WHITE, BLACK, OTHER)';
COMMENT
COMMENT ON COLUMN rif_data.seer_cancer.rac_recy IS 'Race recode Y (W, B, AI, API)';
COMMENT
COMMENT ON COLUMN rif_data.seer_cancer.origrecb IS 'Origin Recode NHIA (HISPANIC, NON-HISP)';
COMMENT
COMMENT ON COLUMN rif_data.seer_cancer.codpub IS 'Cause of death to SEER site recode (see: https://seer.cancer.gov/codrecode/1969+_d09172004/index.html)';
COMMENT
COMMENT ON COLUMN rif_data.seer_cancer.reg IS 'SEER registry (minus 1500 so same as population file)';
COMMENT

--
-- Setup as numerator
--
INSERT INTO rif40.rif40_tables (
   theme,
   table_name,
   description,
   year_start,
   year_stop,
   total_field,
   isindirectdenominator,
   isdirectdenominator,
   isnumerator,
   automatic,
   sex_field_name,
   age_group_field_name,
   age_sex_group_field_name,
   age_group_id)
SELECT
   'cancers',                   /* theme */
   'SEER_CANCER',               /* table_name */
   'SEER Cancer data 1973-2013. 9 States in total',                             /* description */
   MIN(year),                   /* year_start */
   MAX(year),                   /* year_stop */
   NULL,                                /* total_field */
   0,                                   /* isindirectdenominator */
   0,                                   /* isdirectdenominator */
   1,                                   /* isnumerator */
   1,                                   /* automatic */
   NULL,                                /* sex_field_name */
   NULL,                                /* age_group_field_name */
   'AGE_SEX_GROUP',             /* age_sex_group_field_name */
   1                                    /* age_group_id */
  FROM rif_data.seer_cancer;
INSERT 0 1

--
-- Setup ICD field (SEER_ICDOT10V)
-- * ICD-O-3 histology (HISTO3V) to follow later
--
INSERT INTO rif40.rif40_outcome_groups(
   outcome_type, outcome_group_name, outcome_group_description, field_name, multiple_field_count)
SELECT
   'ICD' AS outcome_type,
   'SEER_ICDOT10V' AS outcome_group_name,
   'SEER ICDOT10V' AS outcome_group_description,
   'ICDOT10V' AS field_name,
   0 AS multiple_field_count
WHERE NOT EXISTS (SELECT outcome_group_name FROM  rif40.rif40_outcome_groups WHERE outcome_group_name = 'SEER_ICDOT10V');
INSERT 0 1

INSERT INTO rif40.rif40_table_outcomes (
   outcome_group_name,
   numer_tab,
   current_version_start_year)
SELECT
   'SEER_ICDOT10V',
   'SEER_CANCER',
   MIN(year)
FROM rif_data.seer_cancer;
INSERT 0 1

--
-- Grant
-- * The role SEER_USER needs to be created by an administrator
--
GRANT SELECT ON rif_data.seer_cancer TO seer_user;
GRANT

--
-- End transaction (COMMIT)
--
END;
COMMIT

--
-- Eof

\i pg_rif40_load_seer_population.sql
-- ************************************************************************
--
-- Description:
--
-- Rapid Enquiry Facility (RIF) - Load denominator SEER data into RIF; Does not reformat into RIF4.0 format (see load_seer.sql)
--                                                                * Requires the "seer_user" role
--                                Postgres script
--
-- Copyright:
--
-- The Rapid Inquiry Facility (RIF) is an automated tool devised by SAHSU
-- that rapidly addresses epidemiological and public health questions using
-- routinely collected health and population data and generates standardised
-- rates and relative risks for any given health outcome, for specified age
-- and year ranges, for any given geographical area.
--
-- Copyright 2014 Imperial College London, developed by the Small Area
-- Health Statistics Unit. The work of the Small Area Health Statistics Unit
-- is funded by the Public Health England as part of the MRC-PHE Centre for
-- Environment and Health. Funding for this project has also been received
-- from the Centers for Disease Control and Prevention.
--
-- This file is part of the Rapid Inquiry Facility (RIF) project.
-- RIF is free software: you can redistribute it and/or modify
-- it under the terms of the GNU Lesser General Public License as published by
-- the Free Software Foundation, either version 3 of the License, or
-- (at your option) any later version.
--
-- RIF is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- GNU Lesser General Public License for more details.
--
-- You should have received a copy of the GNU Lesser General Public License
-- along with RIF. If not, see <http://www.gnu.org/licenses/>; or write
-- to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
-- Boston, MA 02110-1301 USA
--
-- Author:
--
-- Peter Hambly, SAHSU
--
-- Postgres RIF40 specific parameters
--
-- Usage: psql -U rif40 -w -e -f pg_rif40_load_seer_population.sql.sql
-- Connect flags if required: -d <Postgres database name> -h <host> -p <port>
--
-- Requires RIF USA county level geography to be loaded: rif_pg_USA_2014.sql
--

\pset pager off
Pager usage is off.
\set ECHO all
\set ON_ERROR_STOP ON
\timing
Timing is on.

--
-- Start transaction
--
BEGIN TRANSACTION;
BEGIN
Time: 5.242 ms

--
-- Setup RIF user
--
DO LANGUAGE plpgsql $$
DECLARE
        c1 CURSOR FOR
                SELECT p.proname
                  FROM pg_proc p, pg_namespace n
                 WHERE p.proname  = 'rif40_startup'
                   AND n.nspname  = 'rif40_sql_pkg'
                   AND p.proowner = (SELECT oid FROM pg_roles WHERE rolname = 'rif40')
                   AND n.oid      = p.pronamespace;
--
        c1_rec RECORD;
        sql_stmt VARCHAR;
BEGIN
        OPEN c1;
        FETCH c1 INTO c1_rec;
        CLOSE c1;
--
        IF c1_rec.proname = 'rif40_startup' THEN
                PERFORM rif40_sql_pkg.rif40_startup();
        ELSE
                RAISE EXCEPTION 'RIF startup(SEER loader): not a RIF database';
        END IF;
--
-- Set a default path and schema for user
--
        IF current_user = 'rif40' THEN
                sql_stmt:='SET SESSION search_path TO rif_data /* default schema */, rif40, public, topology, gis, pop, data_load, rif40_sql_pkg, rif_studies, rif40_partitions';
        ELSE
                RAISE EXCEPTION 'RIF startup(SEER loader): RIF user: % is not rif40', current_user;
        END IF;
        RAISE INFO 'SQL> %;', sql_stmt::VARCHAR;
        EXECUTE sql_stmt;
END;
$$;
psql:pg_rif40_load_seer_population.sql:97: INFO:  rif40_log_setup() send DEBUG to INFO: off; debug function list: []
psql:pg_rif40_load_seer_population.sql:97: INFO:  +00000.03s  rif40_startup(): search_path not set for: rif40
psql:pg_rif40_load_seer_population.sql:97: INFO:  +00000.03s  rif40_startup(): SQL> DROP FUNCTION IF EXISTS rif40.rif40_run_study(INTEGER, INTEGER);
psql:pg_rif40_load_seer_population.sql:97: NOTICE:  function rif40.rif40_run_study(pg_catalog.int4,pg_catalog.int4) does not exist, skipping
psql:pg_rif40_load_seer_population.sql:97: INFO:  +00000.05s  rif40_startup(): Temporary table: g_rif40_comparison_areas exists
psql:pg_rif40_load_seer_population.sql:97: INFO:  +00000.06s  rif40_startup(): PostGIS extension V2.3.5 (POSTGIS="2.3.5 r16110" GEOS="3.6.2-CAPI-1.10.2 4d2925d" PROJ="Rel. 4.9.3, 15 August 2016" GDAL="GDAL 2.2.2, released 2017/09/15" LIBXML="2.7.8" LIBJSON="0.12" RASTER)
psql:pg_rif40_load_seer_population.sql:97: INFO:  +00000.06s  rif40_startup(): FDW functionality disabled - FDWServerName, FDWServerType, FDWDBServer RIF parameters not set.
psql:pg_rif40_load_seer_population.sql:97: INFO:  +00000.06s  rif40_startup(): V$Revision: 1.11 $ DB version $Revision: 1.11 $ matches
psql:pg_rif40_load_seer_population.sql:97: INFO:  +00000.06s  rif40_startup(): V$Revision: 1.11 $ rif40_geographies, rif40_tables, rif40_health_study_themes exist for user: rif40
psql:pg_rif40_load_seer_population.sql:97: INFO:  +00000.06s  rif40_startup(): search_path: rif_data, public, topology, gis, pop, data_load, rif40_sql_pkg, rif_studies, rif40_partitions, reset: rif40, public, topology, gis, pop, rif_data, data_load, rif40_sql_pkg, rif_studies, rif40_partitions
psql:pg_rif40_load_seer_population.sql:97: INFO:  SQL> SET SESSION search_path TO rif_data /* default schema */, rif40, public, topology, gis, pop, data_load, rif40_sql_pkg, rif_studies, rif40_partitions;
DO
Time: 52.219 ms

--
-- Check if geography is loaded
--
DO LANGUAGE plpgsql $$
DECLARE
        c1 CURSOR FOR
                SELECT geography
                  FROM rif40_geographies a
                 WHERE a.geography  = 'USA_2014';
        c1_rec RECORD;
BEGIN
        OPEN c1;
        FETCH c1 INTO c1_rec;
        CLOSE c1;
--
        IF c1_rec.geography = 'USA_2014' THEN
                RAISE INFO 'Geography: USA_2014 loaded';
        ELSE
                RAISE EXCEPTION 'Geography: USA_2014 is NOT loaded';
        END IF;
END;
$$;
psql:pg_rif40_load_seer_population.sql:120: INFO:  Geography: USA_2014 loaded
DO
Time: 2.700 ms

--
-- Check SEER_USER role is present (needs to be created by an administrator)
--
DO LANGUAGE plpgsql $$
DECLARE
        c1 CURSOR FOR
                SELECT rolname
                  FROM pg_roles a
                 WHERE a.rolname  = 'seer_user';
        c1_rec RECORD;
BEGIN
        OPEN c1;
        FETCH c1 INTO c1_rec;
        CLOSE c1;
--
        IF c1_rec.rolname = 'seer_user' THEN
                RAISE INFO 'SEER_USER: role is present';
        ELSE
                RAISE EXCEPTION 'SEER_USER: role is NOT present';
        END IF;
END;
$$;
psql:pg_rif40_load_seer_population.sql:143: INFO:  SEER_USER: role is present
DO
Time: 4.935 ms

--
-- Remove old SEER data
--
DROP TABLE IF EXISTS rif_data.seer_population;
psql:pg_rif40_load_seer_population.sql:148: NOTICE:  table "seer_population" does not exist, skipping
DROP TABLE
Time: 1.768 ms

DELETE FROM rif40.rif40_tables
WHERE table_name='SEER_POPULATION';
DELETE 0
Time: 0.372 ms

--
-- Create SEER_POPULATION numerator table
--
CREATE TABLE rif_data.seer_population
(
  year integer NOT NULL, -- Year
  cb_2014_us_nation_5m text NOT NULL, -- United States to county level including territories
  cb_2014_us_state_500k text NOT NULL, -- State geographic Names Information System (GNIS) code
  cb_2014_us_county_500k text NOT NULL, -- County geographic Names Information System (GNIS) code. Unjoined county FIPS codes to "UNKNOWN: " + county FIPS code; e.g. the 900 series to represent county/independent city combinations in Virginia.
  age_sex_group integer NOT NULL, -- RIF age_sex_group 1 (21 bands)
  population numeric, -- Population
  CONSTRAINT seer_population_pk PRIMARY KEY (year, cb_2014_us_nation_5m, cb_2014_us_state_500k, cb_2014_us_county_500k, age_sex_group),
  CONSTRAINT seer_population_asg_ck CHECK (age_sex_group >= 100 AND age_sex_group <= 121 OR age_sex_group >= 200 AND age_sex_group <= 221)
);
CREATE TABLE
Time: 16.006 ms
COMMENT ON TABLE rif_data.seer_population
  IS 'SEER Population 1972-2013. Georgia starts in 1975, Washington in 1974. 9 States in total';
COMMENT
Time: 0.511 ms
COMMENT ON COLUMN rif_data.seer_population.year IS 'Year';
COMMENT
Time: 0.393 ms
COMMENT ON COLUMN rif_data.seer_population.cb_2014_us_nation_5m IS 'United States to county level including territories';
COMMENT
Time: 0.392 ms
COMMENT ON COLUMN rif_data.seer_population.cb_2014_us_state_500k IS 'State geographic Names Information System (GNIS) code';
COMMENT
Time: 0.428 ms
COMMENT ON COLUMN rif_data.seer_population.cb_2014_us_county_500k IS 'County geographic Names Information System (GNIS) code. Unjoined county FIPS codes to "UNKNOWN: " + county FIPS code; e.g. the 900 series to represent county/independent city combinations in Virginia.';
COMMENT
Time: 0.346 ms
COMMENT ON COLUMN rif_data.seer_population.age_sex_group IS 'RIF age_sex_group 1 (21 bands)';
COMMENT
Time: 0.372 ms
COMMENT ON COLUMN rif_data.seer_population.population IS 'Population';
COMMENT
Time: 0.251 ms

--
-- Load
--
\copy rif_data.seer_population FROM 'seer_population.csv' WITH CSV HEADER;
COPY 614360
Time: 3435.280 ms

--
-- Check rowcount
--
SELECT COUNT(*) AS total FROM seer_population;
 total
--------
 614360
(1 row)

Time: 66.457 ms
DO LANGUAGE plpgsql $$
DECLARE
        c1 CURSOR FOR
                SELECT COUNT(*) AS total
                  FROM seer_population;
        c1_rec RECORD;
BEGIN
        OPEN c1;
        FETCH c1 INTO c1_rec;
        CLOSE c1;
--
        IF c1_rec.total = 614360 THEN
                RAISE INFO 'Table: seer_population has % rows', c1_rec.total;
        ELSE
                RAISE EXCEPTION 'Table: seer_population has % rows; expecting 614360', c1_rec.total;
        END IF;
END;
$$;
psql:pg_rif40_load_seer_population.sql:202: INFO:  Table: seer_population has 614360 rows
DO
Time: 45.965 ms

--
-- Convert to index organised table
--
CLUSTER rif_data.seer_population USING seer_population_pk;
CLUSTER
Time: 1138.351 ms

--
-- Indexes
--
CREATE INDEX seer_population_age_sex_group
  ON rif_data.seer_population
  (age_sex_group);
CREATE INDEX
Time: 459.565 ms

CREATE INDEX seer_population_cb_2014_us_county_500k
  ON rif_data.seer_population
  (cb_2014_us_county_500k);
CREATE INDEX
Time: 1440.758 ms

CREATE INDEX seer_population_cb_2014_us_nation_5m
  ON rif_data.seer_population
  (cb_2014_us_nation_5m);
CREATE INDEX
Time: 195.549 ms

CREATE INDEX seer_population_cb_2014_us_state_500k
  ON rif_data.seer_population
  (cb_2014_us_state_500k);
CREATE INDEX
Time: 851.007 ms

CREATE INDEX seer_population_year
  ON rif_data.seer_population
  (year);
CREATE INDEX
Time: 185.209 ms

--
-- Setup as denominator
--
INSERT INTO rif40.rif40_tables (
   theme,
   table_name,
   description,
   year_start,
   year_stop,
   total_field,
   isindirectdenominator,
   isdirectdenominator,
   isnumerator,
   automatic,
   sex_field_name,
   age_group_field_name,
   age_sex_group_field_name,
   age_group_id)
SELECT
   'cancers',           /* theme */
   'SEER_POPULATION',   /* table_name */
   'SEER Population 1972-2013. Georgia starts in 1975, Washington in 1974. 9 States in total',  /* description */
   MIN(YEAR),           /* year_start */
   MAX(YEAR),           /* year_stop */
   'POPULATION',                        /* total_field */
   1,                           /* isindirectdenominator */
   0,                           /* isdirectdenominator */
   0,                           /* isnumerator */
   1,                           /* automatic */
   NULL,                        /* sex_field_name */
   NULL,                        /* age_group_field_name */
   'AGE_SEX_GROUP',     /* age_sex_group_field_name */
   1                            /* age_group_id */
FROM rif_data.seer_population;
INSERT 0 1
Time: 37.323 ms

--
-- Grant
-- * The role SEER_USER needs to be created by an administrator
--
GRANT SELECT ON rif_data.seer_population TO seer_user;
GRANT
Time: 0.417 ms


--
-- End transaction (COMMIT)
--
END;
COMMIT
Time: 16.087 ms

--
-- Eof
\i pg_rif40_load_seer_covariates.sql
-- ************************************************************************
--
-- Description:
--
-- Rapid Enquiry Facility (RIF) - Load covariate SEER data into RIF; Does not reformat into RIF4.0 format (see load_seer.sql)
--                                                                * Requires the "seer_user" role
--                                Postgres script
--
-- Copyright:
--
-- The Rapid Inquiry Facility (RIF) is an automated tool devised by SAHSU
-- that rapidly addresses epidemiological and public health questions using
-- routinely collected health and population data and generates standardised
-- rates and relative risks for any given health outcome, for specified age
-- and year ranges, for any given geographical area.
--
-- Copyright 2014 Imperial College London, developed by the Small Area
-- Health Statistics Unit. The work of the Small Area Health Statistics Unit
-- is funded by the Public Health England as part of the MRC-PHE Centre for
-- Environment and Health. Funding for this project has also been received
-- from the Centers for Disease Control and Prevention.
--
-- This file is part of the Rapid Inquiry Facility (RIF) project.
-- RIF is free software: you can redistribute it and/or modify
-- it under the terms of the GNU Lesser General Public License as published by
-- the Free Software Foundation, either version 3 of the License, or
-- (at your option) any later version.
--
-- RIF is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- GNU Lesser General Public License for more details.
--
-- You should have received a copy of the GNU Lesser General Public License
-- along with RIF. If not, see <http://www.gnu.org/licenses/>; or write
-- to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
-- Boston, MA 02110-1301 USA
--
-- Author:
--
-- Peter Hambly, SAHSU
--
-- Postgres RIF40 specific parameters
--
-- Usage: psql -U rif40 -w -e -f pg_rif40_load_seer_covariates.sql.sql.sql
-- Connect flags if required: -d <Postgres database name> -h <host> -p <port>
--
-- Requires RIF USA county level geography to be loaded: rif_pg_USA_2014.sql
--

\pset pager off
Pager usage is off.
\set ECHO all
\set ON_ERROR_STOP ON
\timing
Timing is off.

--
-- Start transaction
--
BEGIN TRANSACTION;
BEGIN

--
-- Setup RIF user
--
DO LANGUAGE plpgsql $$
DECLARE
        c1 CURSOR FOR
                SELECT p.proname
                  FROM pg_proc p, pg_namespace n
                 WHERE p.proname  = 'rif40_startup'
                   AND n.nspname  = 'rif40_sql_pkg'
                   AND p.proowner = (SELECT oid FROM pg_roles WHERE rolname = 'rif40')
                   AND n.oid      = p.pronamespace;
--
        c1_rec RECORD;
        sql_stmt VARCHAR;
BEGIN
        OPEN c1;
        FETCH c1 INTO c1_rec;
        CLOSE c1;
--
        IF c1_rec.proname = 'rif40_startup' THEN
                PERFORM rif40_sql_pkg.rif40_startup();
        ELSE
                RAISE EXCEPTION 'RIF startup(SEER loader): not a RIF database';
        END IF;
--
-- Set a default path and schema for user
--
        IF current_user = 'rif40' THEN
                sql_stmt:='SET SESSION search_path TO rif_data /* default schema */, rif40, public, topology, gis, pop, data_load, rif40_sql_pkg, rif_studies, rif40_partitions';
        ELSE
                RAISE EXCEPTION 'RIF startup(SEER loader): RIF user: % is not rif40', current_user;
        END IF;
        RAISE INFO 'SQL> %;', sql_stmt::VARCHAR;
        EXECUTE sql_stmt;
END;
$$;
psql:pg_rif40_load_seer_covariates.sql:97: INFO:  rif40_log_setup() send DEBUG to INFO: off; debug function list: []
psql:pg_rif40_load_seer_covariates.sql:97: INFO:  +00000.02s  rif40_startup(): search_path not set for: rif40
psql:pg_rif40_load_seer_covariates.sql:97: INFO:  +00000.02s  rif40_startup(): SQL> DROP FUNCTION IF EXISTS rif40.rif40_run_study(INTEGER, INTEGER);
psql:pg_rif40_load_seer_covariates.sql:97: NOTICE:  function rif40.rif40_run_study(pg_catalog.int4,pg_catalog.int4) does not exist, skipping
psql:pg_rif40_load_seer_covariates.sql:97: INFO:  +00000.03s  rif40_startup(): Temporary table: g_rif40_comparison_areas exists
psql:pg_rif40_load_seer_covariates.sql:97: INFO:  +00000.04s  rif40_startup(): PostGIS extension V2.3.5 (POSTGIS="2.3.5 r16110" GEOS="3.6.2-CAPI-1.10.2 4d2925d" PROJ="Rel. 4.9.3, 15 August 2016" GDAL="GDAL 2.2.2, released 2017/09/15" LIBXML="2.7.8" LIBJSON="0.12" RASTER)
psql:pg_rif40_load_seer_covariates.sql:97: INFO:  +00000.04s  rif40_startup(): FDW functionality disabled - FDWServerName, FDWServerType, FDWDBServer RIF parameters not set.
psql:pg_rif40_load_seer_covariates.sql:97: INFO:  +00000.04s  rif40_startup(): V$Revision: 1.11 $ DB version $Revision: 1.11 $ matches
psql:pg_rif40_load_seer_covariates.sql:97: INFO:  +00000.04s  rif40_startup(): V$Revision: 1.11 $ rif40_geographies, rif40_tables, rif40_health_study_themes exist for user: rif40
psql:pg_rif40_load_seer_covariates.sql:97: INFO:  +00000.04s  rif40_startup(): search_path: rif_data, public, topology, gis, pop, data_load, rif40_sql_pkg, rif_studies, rif40_partitions, reset: rif40, public, topology, gis, pop, rif_data, data_load, rif40_sql_pkg, rif_studies, rif40_partitions
psql:pg_rif40_load_seer_covariates.sql:97: INFO:  SQL> SET SESSION search_path TO rif_data /* default schema */, rif40, public, topology, gis, pop, data_load, rif40_sql_pkg, rif_studies, rif40_partitions;
DO

--
-- Check if geography is loaded
--
DO LANGUAGE plpgsql $$
DECLARE
        c1 CURSOR FOR
                SELECT geography
                  FROM rif40_geographies a
                 WHERE a.geography  = 'USA_2014';
        c1_rec RECORD;
BEGIN
        OPEN c1;
        FETCH c1 INTO c1_rec;
        CLOSE c1;
--
        IF c1_rec.geography = 'USA_2014' THEN
                RAISE INFO 'Geography: USA_2014 loaded';
        ELSE
                RAISE EXCEPTION 'Geography: USA_2014 is NOT loaded';
        END IF;
END;
$$;
psql:pg_rif40_load_seer_covariates.sql:120: INFO:  Geography: USA_2014 loaded
DO

--
-- Check SEER_USER role is present (needs to be created by an administrator)
--
DO LANGUAGE plpgsql $$
DECLARE
        c1 CURSOR FOR
                SELECT rolname
                  FROM pg_roles a
                 WHERE a.rolname  = 'seer_user';
        c1_rec RECORD;
BEGIN
        OPEN c1;
        FETCH c1 INTO c1_rec;
        CLOSE c1;
--
        IF c1_rec.rolname = 'seer_user' THEN
                RAISE INFO 'SEER_USER: role is present';
        ELSE
                RAISE EXCEPTION 'SEER_USER: role is NOT present';
        END IF;
END;
$$;
psql:pg_rif40_load_seer_covariates.sql:143: INFO:  SEER_USER: role is present
DO
--
-- Remove old SEER data
--
DROP TABLE IF EXISTS rif_data.cov_cb_2014_us_county_500k;
DROP TABLE
DROP TABLE IF EXISTS rif_data.cov_cb_2014_us_state_500k;
DROP TABLE

DELETE FROM rif40_covariates
 WHERE geography = 'USA_2014';
DELETE 0


--
-- Covariates tables
--

-- Table: rif_data.cov_cb_2014_us_county_500k
CREATE TABLE rif_data.cov_cb_2014_us_county_500k
(
  year                                                                                          integer NOT NULL, -- Year
  cb_2014_us_county_500k                                                        character varying(30) NOT NULL, -- Geolevel name
  areaname                                                                                      character varying(200),
  total_poverty_all_ages                                                        INTEGER,
  pct_poverty_all_ages                                                          NUMERIC,
  pct_poverty_0_17                                                                      NUMERIC,
  pct_poverty_related_5_17                                                      NUMERIC,
  median_household_income                                                       NUMERIC,
  median_hh_income_quin                                                         INTEGER,
  med_pct_not_in_pov_quin                                                       INTEGER,
  med_pct_not_in_pov_0_17_quin                                          INTEGER,
  med_pct_not_in_pov_5_17r_quin                                         INTEGER,
  pct_white_quintile                                                            INTEGER,
  pct_black_quintile                                                            INTEGER,
  CONSTRAINT cov_cb_2014_us_county_500k_pkey PRIMARY KEY (year, cb_2014_us_county_500k)
);
CREATE TABLE

\copy cov_cb_2014_us_county_500k FROM 'cov_cb_2014_us_county_500k.csv' WITH CSV HEADER;
COPY 132553

--
-- Check rowcount
--
SELECT COUNT(*) AS total FROM cov_cb_2014_us_county_500k;
 total
--------
 132553
(1 row)

DO LANGUAGE plpgsql $$
DECLARE
        c1 CURSOR FOR
                SELECT COUNT(*) AS total
                  FROM cov_cb_2014_us_county_500k;
        c1_rec RECORD;
BEGIN
        OPEN c1;
        FETCH c1 INTO c1_rec;
        CLOSE c1;
--
        IF c1_rec.total = 132553 THEN
                RAISE INFO 'Table: cov_cb_2014_us_county_500k has % rows', c1_rec.total;
        ELSE
                RAISE EXCEPTION 'Table: cov_cb_2014_us_county_500k has % rows; expecting 132553', c1_rec.total;
        END IF;
END;
$$;
psql:pg_rif40_load_seer_covariates.sql:201: INFO:  Table: cov_cb_2014_us_county_500k has 132553 rows
DO

--
-- Convert to index organised table
--
CLUSTER rif_data.cov_cb_2014_us_county_500k USING cov_cb_2014_us_county_500k_pkey;
CLUSTER

COMMENT ON TABLE rif_data.cov_cb_2014_us_county_500k
  IS 'Example covariate table for: The County at a scale of 1:500,000';
COMMENT
COMMENT ON COLUMN rif_data.cov_cb_2014_us_county_500k.year IS 'Year';
COMMENT
COMMENT ON COLUMN rif_data.cov_cb_2014_us_county_500k.cb_2014_us_county_500k IS 'County FIPS code (geolevel id)';
COMMENT
COMMENT ON COLUMN rif_data.cov_cb_2014_us_county_500k.areaname IS 'Area (county) name';
COMMENT
COMMENT ON COLUMN rif_data.cov_cb_2014_us_county_500k.total_poverty_all_ages IS 'Estimate of people of all ages in poverty';
COMMENT
COMMENT ON COLUMN rif_data.cov_cb_2014_us_county_500k.pct_poverty_all_ages IS 'Estimate percent of people of all ages in poverty';
COMMENT
COMMENT ON COLUMN rif_data.cov_cb_2014_us_county_500k.pct_poverty_0_17 IS 'Estimated percent of people age 0-17 in poverty';
COMMENT
COMMENT ON COLUMN rif_data.cov_cb_2014_us_county_500k.pct_poverty_related_5_17 IS 'Estimated percent of related children age 5-17 in families in poverty';
COMMENT
COMMENT ON COLUMN rif_data.cov_cb_2014_us_county_500k.median_household_income IS 'Estimate of median household income';
COMMENT
COMMENT ON COLUMN rif_data.cov_cb_2014_us_county_500k.median_hh_income_quin IS 'Quintile: estimate of median household income (1=most deprived, 5=least)';
COMMENT
COMMENT ON COLUMN rif_data.cov_cb_2014_us_county_500k.med_pct_not_in_pov_quin IS 'Quintile: estimate percent of people of all ages NOT in poverty (1=most deprived, 5=least)';
COMMENT
COMMENT ON COLUMN rif_data.cov_cb_2014_us_county_500k.med_pct_not_in_pov_0_17_quin IS 'Quintile: estimated percent of people age 0-17 NOT in poverty (1=most deprived, 5=least)';
COMMENT
COMMENT ON COLUMN rif_data.cov_cb_2014_us_county_500k.med_pct_not_in_pov_5_17r_quin IS 'Quintile: estimated percent of related children age 5-17 in families NOT in poverty (1=most deprived, 5=least)';
COMMENT
COMMENT ON COLUMN rif_data.cov_cb_2014_us_county_500k.pct_white_quintile IS '% White quintile (1=least white, 5=most)';
COMMENT
COMMENT ON COLUMN rif_data.cov_cb_2014_us_county_500k.pct_black_quintile IS '% Black quintile (1=least black, 5=most)';
COMMENT

--
-- Grant
-- * The role SEER_USER needs to be created by an administrator
--
GRANT SELECT ON rif_data.cov_cb_2014_us_county_500k TO seer_user;
GRANT

-- Table: rif_data.cov_cb_2014_us_state_500k

CREATE TABLE rif_data.cov_cb_2014_us_state_500k
(
  year integer NOT NULL, -- Year
  cb_2014_us_state_500k character varying(30) NOT NULL, -- Geolevel name
  areaname                                                                                      character varying(200),
  total_poverty_all_ages                                                        INTEGER,
  pct_poverty_all_ages                                                          NUMERIC,
  pct_poverty_0_17                                                                      NUMERIC,
  pct_poverty_related_5_17                                                      NUMERIC,
  median_household_income                                                       NUMERIC,
  median_hh_income_quin                                                         INTEGER,
  med_pct_not_in_pov_quin                                                       INTEGER,
  med_pct_not_in_pov_0_17_quin                                          INTEGER,
  med_pct_not_in_pov_5_17r_quin                                         INTEGER,
  CONSTRAINT cov_cb_2014_us_state_500k_pkey PRIMARY KEY (year, cb_2014_us_state_500k)
);
CREATE TABLE

\copy cov_cb_2014_us_state_500k FROM 'cov_cb_2014_us_state_500k.csv' WITH CSV HEADER;
COPY 2296

SELECT COUNT(*) AS total FROM cov_cb_2014_us_state_500k;
 total
-------
  2296
(1 row)

DO LANGUAGE plpgsql $$
DECLARE
        c1 CURSOR FOR
                SELECT COUNT(*) AS total
                  FROM cov_cb_2014_us_state_500k;
        c1_rec RECORD;
BEGIN
        OPEN c1;
        FETCH c1 INTO c1_rec;
        CLOSE c1;
--
        IF c1_rec.total = 2296 THEN
                RAISE INFO 'Table: cov_cb_2014_us_state_500k has % rows', c1_rec.total;
        ELSE
                RAISE EXCEPTION 'Table: cov_cb_2014_us_state_500k has % rows; expecting 2296', c1_rec.total;
        END IF;
END;
$$;
psql:pg_rif40_load_seer_covariates.sql:270: INFO:  Table: cov_cb_2014_us_state_500k has 2296 rows
DO

--
-- Convert to index organised table
--
CLUSTER rif_data.cov_cb_2014_us_state_500k USING cov_cb_2014_us_state_500k_pkey;
CLUSTER

COMMENT ON TABLE rif_data.cov_cb_2014_us_state_500k
  IS 'Example covariate table for: The State at a scale of 1:500,000';
COMMENT
COMMENT ON COLUMN rif_data.cov_cb_2014_us_state_500k.year IS 'Year';
COMMENT
COMMENT ON COLUMN rif_data.cov_cb_2014_us_state_500k.cb_2014_us_state_500k IS 'State geographic Names Information System (GNIS) code';
COMMENT
COMMENT ON COLUMN rif_data.cov_cb_2014_us_state_500k.total_poverty_all_ages IS 'Estimate of people of all ages in poverty';
COMMENT
COMMENT ON COLUMN rif_data.cov_cb_2014_us_state_500k.pct_poverty_all_ages IS 'Estimate percent of people of all ages in poverty';
COMMENT
COMMENT ON COLUMN rif_data.cov_cb_2014_us_state_500k.pct_poverty_0_17 IS 'Estimated percent of people age 0-17 in poverty';
COMMENT
COMMENT ON COLUMN rif_data.cov_cb_2014_us_state_500k.pct_poverty_related_5_17 IS 'Estimated percent of related children age 5-17 in families in poverty';
COMMENT
COMMENT ON COLUMN rif_data.cov_cb_2014_us_state_500k.median_household_income IS 'Estimate of median household income';
COMMENT
COMMENT ON COLUMN rif_data.cov_cb_2014_us_state_500k.median_hh_income_quin IS 'Quintile: estimate of median household income (1=most deprived, 5=least)';
COMMENT
COMMENT ON COLUMN rif_data.cov_cb_2014_us_state_500k.med_pct_not_in_pov_quin IS 'Quintile: estimate percent of people of all ages NOT in poverty (1=most deprived, 5=least)';
COMMENT
COMMENT ON COLUMN rif_data.cov_cb_2014_us_state_500k.med_pct_not_in_pov_0_17_quin IS 'Quintile: estimated percent of people age 0-17 NOT in poverty (1=most deprived, 5=least)';
COMMENT
COMMENT ON COLUMN rif_data.cov_cb_2014_us_state_500k.med_pct_not_in_pov_5_17r_quin IS 'Quintile: estimated percent of related children age 5-17 in families NOT in poverty (1=most deprived, 5=least)';
COMMENT

--
-- Grant
-- * The role SEER_USER needs to be created by an administrator
--
GRANT SELECT ON rif_data.cov_cb_2014_us_state_500k TO seer_user;
GRANT

--
-- RIF40_COVARIATES integration. Continuous variable type (2) not yet supported.
-- * Add ethnicity: % white, black quintilised
--
INSERT INTO rif40_covariates(geography, geolevel_name, covariate_name, min, max, type)
SELECT 'USA_2014',                                      /* Geography (e.g EW2001) */
       'CB_2014_US_COUNTY_500K',        /* Name of geolevel. This will be a column name in the numerator/denominator tables */
       'MEDIAN_HH_INCOME_QUIN',         /* Covariate name. This will be a column name in RIF40_GEOLEVELS.COVARIATE_TABLE */
       MIN(median_hh_income_quin),      /* Minimum value */
       MAX(median_hh_income_quin),      /* Maximum value */
           1                                                    /* Type: integer score */
  FROM rif_data.cov_cb_2014_us_county_500k;
INSERT 0 1
INSERT INTO rif40_covariates(geography, geolevel_name, covariate_name, min, max, type)
SELECT 'USA_2014',                                      /* Geography (e.g EW2001) */
       'CB_2014_US_COUNTY_500K',        /* Name of geolevel. This will be a column name in the numerator/denominator tables */
       'MED_PCT_NOT_IN_POV_QUIN',       /* Covariate name. This will be a column name in RIF40_GEOLEVELS.COVARIATE_TABLE */
       MIN(med_pct_not_in_pov_quin),    /* Minimum value */
       MAX(med_pct_not_in_pov_quin),    /* Maximum value */
           1                                                            /* Type: integer score */
  FROM rif_data.cov_cb_2014_us_county_500k;
INSERT 0 1
INSERT INTO rif40_covariates(geography, geolevel_name, covariate_name, min, max, type)
SELECT 'USA_2014',                                      /* Geography (e.g EW2001) */
       'CB_2014_US_COUNTY_500K',        /* Name of geolevel. This will be a column name in the numerator/denominator tables */
       'MED_PCT_NOT_IN_POV_0_17_QUIN',  /* Covariate name. This will be a column name in RIF40_GEOLEVELS.COVARIATE_TABLE */
       MIN(med_pct_not_in_pov_0_17_quin),       /* Minimum value */
       MAX(med_pct_not_in_pov_0_17_quin),       /* Maximum value */
           1                                                                    /* Type: integer score */
  FROM rif_data.cov_cb_2014_us_county_500k;
INSERT 0 1
INSERT INTO rif40_covariates(geography, geolevel_name, covariate_name, min, max, type)
SELECT 'USA_2014',                                      /* Geography (e.g EW2001) */
       'CB_2014_US_COUNTY_500K',        /* Name of geolevel. This will be a column name in the numerator/denominator tables */
       'MED_PCT_NOT_IN_POV_5_17R_QUIN',         /* Covariate name. This will be a column name in RIF40_GEOLEVELS.COVARIATE_TABLE */
       MIN(med_pct_not_in_pov_5_17r_quin),      /* Minimum value */
       MAX(med_pct_not_in_pov_5_17r_quin),      /* Maximum value */
           1                                                                    /* Type: integer score */
  FROM rif_data.cov_cb_2014_us_county_500k;
INSERT 0 1
INSERT INTO rif40_covariates(geography, geolevel_name, covariate_name, min, max, type)
SELECT 'USA_2014',                                      /* Geography (e.g EW2001) */
       'CB_2014_US_COUNTY_500K',        /* Name of geolevel. This will be a column name in the numerator/denominator tables */
       'PCT_WHITE_QUINTILE',            /* Covariate name. This will be a column name in RIF40_GEOLEVELS.COVARIATE_TABLE */
       MIN(pct_white_quintile),         /* Minimum value */
       MAX(pct_white_quintile),         /* Maximum value */
           1                                                    /* Type: integer score */
  FROM rif_data.cov_cb_2014_us_county_500k;
INSERT 0 1
INSERT INTO rif40_covariates(geography, geolevel_name, covariate_name, min, max, type)
SELECT 'USA_2014',                                      /* Geography (e.g EW2001) */
       'CB_2014_US_COUNTY_500K',        /* Name of geolevel. This will be a column name in the numerator/denominator tables */
       'PCT_BLACK_QUINTILE',            /* Covariate name. This will be a column name in RIF40_GEOLEVELS.COVARIATE_TABLE */
       MIN(pct_black_quintile),         /* Minimum value */
       MAX(pct_black_quintile),         /* Maximum value */
           1                                                    /* Type: integer score */
  FROM rif_data.cov_cb_2014_us_county_500k;
INSERT 0 1

INSERT INTO rif40_covariates(geography, geolevel_name, covariate_name, min, max, type)
SELECT 'USA_2014',                                      /* Geography (e.g EW2001) */
       'CB_2014_US_STATE_500K',         /* Name of geolevel. This will be a column name in the numerator/denominator tables */
       'MEDIAN_HH_INCOME_QUIN',         /* Covariate name. This will be a column name in RIF40_GEOLEVELS.COVARIATE_TABLE */
       MIN(median_hh_income_quin),      /* Minimum value */
       MAX(median_hh_income_quin),      /* Maximum value */
           1                                                    /* Type: integer score */
  FROM rif_data.cov_cb_2014_us_state_500k;
INSERT 0 1
INSERT INTO rif40_covariates(geography, geolevel_name, covariate_name, min, max, type)
SELECT 'USA_2014',                                      /* Geography (e.g EW2001) */
       'CB_2014_US_STATE_500K',         /* Name of geolevel. This will be a column name in the numerator/denominator tables */
       'MED_PCT_NOT_IN_POV_QUIN',       /* Covariate name. This will be a column name in RIF40_GEOLEVELS.COVARIATE_TABLE */
       MIN(med_pct_not_in_pov_quin),    /* Minimum value */
       MAX(med_pct_not_in_pov_quin),    /* Maximum value */
           1                                                            /* Type: integer score */
  FROM rif_data.cov_cb_2014_us_state_500k;
INSERT 0 1
INSERT INTO rif40_covariates(geography, geolevel_name, covariate_name, min, max, type)
SELECT 'USA_2014',                                      /* Geography (e.g EW2001) */
       'CB_2014_US_STATE_500K',         /* Name of geolevel. This will be a column name in the numerator/denominator tables */
       'MED_PCT_NOT_IN_POV_0_17_QUIN',  /* Covariate name. This will be a column name in RIF40_GEOLEVELS.COVARIATE_TABLE */
       MIN(med_pct_not_in_pov_0_17_quin),       /* Minimum value */
       MAX(med_pct_not_in_pov_0_17_quin),       /* Maximum value */
           1                                                                    /* Type: integer score */
  FROM rif_data.cov_cb_2014_us_state_500k;
INSERT 0 1
INSERT INTO rif40_covariates(geography, geolevel_name, covariate_name, min, max, type)
SELECT 'USA_2014',                                      /* Geography (e.g EW2001) */
       'CB_2014_US_STATE_500K',         /* Name of geolevel. This will be a column name in the numerator/denominator tables */
       'MED_PCT_NOT_IN_POV_5_17R_QUIN',         /* Covariate name. This will be a column name in RIF40_GEOLEVELS.COVARIATE_TABLE */
       MIN(med_pct_not_in_pov_5_17r_quin),      /* Minimum value */
       MAX(med_pct_not_in_pov_5_17r_quin),      /* Maximum value */
           1                                                                    /* Type: integer score */
  FROM rif_data.cov_cb_2014_us_state_500k;
INSERT 0 1

--
-- End transaction (COMMIT)
--
END;
COMMIT

--
-- Eof

--
-- Analyse tables
--
ANALYZE VERBOSE rif_data.seer_cancer;
psql:pg_rif40_load_seer.sql:70: WARNING:  skipping "seer_cancer" --- cannot analyze non-tables or special system tables
ANALYZE
ANALYZE VERBOSE rif_data.seer_population;
psql:pg_rif40_load_seer.sql:71: INFO:  analyzing "rif_data.seer_population"
psql:pg_rif40_load_seer.sql:71: INFO:  "seer_population": scanned 5123 of 5123 pages, containing 614360 live rows and 0 dead rows; 30000 rows in sample, 614360 estimated total rows
ANALYZE
ANALYZE VERBOSE rif_data.cov_cb_2014_us_county_500k;
psql:pg_rif40_load_seer.sql:72: INFO:  analyzing "rif_data.cov_cb_2014_us_county_500k"
psql:pg_rif40_load_seer.sql:72: INFO:  "cov_cb_2014_us_county_500k": scanned 1745 of 1745 pages, containing 132553 live rows and 0 dead rows; 30000 rows in sample, 132553 estimated total rows
ANALYZE
ANALYZE VERBOSE rif_data.cov_cb_2014_us_state_500k;
psql:pg_rif40_load_seer.sql:73: INFO:  analyzing "rif_data.cov_cb_2014_us_state_500k"
psql:pg_rif40_load_seer.sql:73: INFO:  "cov_cb_2014_us_state_500k": scanned 29 of 29 pages, containing 2296 live rows and 0 dead rows; 2296 rows in sample, 2296 estimated total rows
ANALYZE

--
-- Eof