Skip to main content

Course set solutions – beginners course - basic

The script below shows how to solve the tasks in the course set that is used for the basic introductory course that Sikt and SSB has run earlier. Click here for more about our courses.

 // Building up a dataset

require no.ssb.fdb:23 as ds

create-dataset totalpop

import ds/BEFOLKNING_FOEDSELS_AAR_MND as birth_year_month
generate age = 2020 - int(birth_year_month/100)
histogram age, discrete

import ds/BEFOLKNING_STATUSKODE 2020-01-01 as regstat
keep if regstat == '1'

histogram age, discrete
summarize age

keep if age > 30 & age < 50

import ds/BEFOLKNING_KJOENN as gender
import ds/BEFOLKNING_FODELAND as country
import ds/NUDB_BU 2020-08-31 as education
import ds/BEFOLKNING_KOMMNR_FAKTISK 2020-01-01 as residence
import ds/INNTEKT_WLONN 2015-12-31 as salary15
import ds/INNTEKT_WLONN 2016-12-31 as salary16
import ds/INNTEKT_WLONN 2017-12-31 as salary17
import ds/INNTEKT_WLONN 2018-12-31 as salary18
import ds/INNTEKT_WLONN 2019-12-31 as salary19

// Running descriptive statistics

// Unidimensional
tabulate gender, cellpct

summarize salary15 salary16 salary17 salary18 salary19
summarize salary15 salary16 salary17 salary18 salary19, gini

barchart (mean) salary15 salary16 salary17 salary18 salary19
barchart (count) salary15 salary16 salary17 salary18 salary19
barchart (median) salary15 salary16 salary17 salary18 salary19

histogram salary19, freq
histogram salary19, freq normal
histogram salary19, bin(10)
histogram salary19, bin(4)

// Two-dimensional

generate norwegian = 0
replace norwegian = 1 if country == '000'
tabulate norwegian
tabulate norwegian, cellpct
piechart norwegian

generate edu_level = substr(education,1,1)
tabulate edu_level, cellpct

tabulate norwegian gender, rowpct
tabulate edu_level gender, rowpct
tabulate edu_level norwegian, rowpct

destring edu_level
summarize salary15 salary16 salary17 salary18 salary19 if edu_level < 2
summarize salary15 salary16 salary17 salary18 salary19 if edu_level > 6

tabulate gender, summarize(salary19)
tabulate norwegian, summarize(salary19)
tabulate edu_level, summarize(salary19)
tabulate edu_level gender, summarize(salary19)

barchart (mean) salary15 salary16 salary17 salary18 salary19, over(gender)
barchart (mean) salary15 salary16 salary17 salary18 salary19, over(norwegian)
barchart (mean) salary15 salary16 salary17 salary18 salary19, over(edu_level)

histogram salary19, by(gender)
histogram salary19, by(norwegian)

// Using labels in tables

define-labels edulabel 0 'No education' 1 'Elementary school' 2 'Middle school' 3 'High school' 4 'High school - final year' 5 'Supplementary to high school' 6 'Higher education - lower level' 7 'Higher education - higher level' 8 'Research education' 9 Unknown
assign-labels edu_level edulabel
tabulate edu_level gender

// Creating own divisions
generate edu_group = 1 if edu_level >= 0
replace edu_group = 2 if edu_level >= 6
replace edu_group = 3 if edu_level >= 7
replace edu_group = 9 if edu_level == 9

define-labels edulabel2 1 Low 2 Medium 3 High 9 Unknown
assign-labels edu_group edulabel2
tabulate edu_group gender, rowpct freq
tabulate edu_group gender, rowpct freq missing

// Finally, run a simple regression
generate male = 0
replace male = 1 if gender == '1'

generate oslo = 1 if residence == '0301'
replace oslo = 0 if residence != '0301'
tabulate oslo, cellpct

generate high_edu = 1 if edu_level >= 7
replace high_edu = 0 if edu_level >= 0 & edu_level < 7

regress salary19 age norwegian oslo male high_edu