Skip to main content

Course set solutions – thematic course on job data from the A-scheme

The script below shows how to work with job data from the A scheme. This was reviewed in our theme course, which was run twice in 2022. The theme course also demonstrates how to work with data about ongoing studies. See separate examples for this: Example 1 and example 2.

Click here for more about our courses.

 textblock
Thematic Course: How to work with job data
------------------------------------
endblock

require no.ssb.fdb:23 as db

textblock
1) How to convert data from job to individual level - numerical information
----------------------------------------------------------------------------
endblock

// Create a dataset for employed individuals (individual data)
create-dataset employed
import db/ARBLONN_PERS_KJOENN 2021-07-16 as gender
import db/ARBLONN_PERS_ALDER 2021-07-16 as age

// Create a dataset for employment relationships (job data)
create-dataset employment_relationships
import db/ARBLONN_ARB_ARBEIDSTID 2021-07-16 as working_hours
import db/ARBLONN_ARB_STILLINGSPST 2021-07-16 as position_percent
import db/ARBEIDSFORHOLD_PERSON as person_id

// Aggregate from job data to individual data level by summing working hours and position percentage per person. Then merge job information into the employed individual dataset
collapse (sum) working_hours position_percent, by(person_id)
merge working_hours position_percent into employed

// Create job statistics
use employed

summarize working_hours position_percent
tabulate gender, summarize(working_hours)
tabulate gender, summarize(position_percent)

generate age_group = 1
replace age_group = 2 if age > 25
replace age_group = 3 if age > 40
replace age_group = 4 if age > 60
define-labels age_label 1 '0-25' 2 '26-40' 3 '41-60' 4 '61->'
assign-labels age_group age_label
tabulate age_group gender, summarize(working_hours)
tabulate age_group gender, summarize(position_percent)


textblock
2) How to convert data from job to individual level - categorical information
------------------------------------------------------------------------------
endblock

create-dataset vestland
import db/ARBLONN_PERS_KOMMNR 2021-07-31 as residence
keep if substr(residence,1,2) == '46'
import db/ARBLONN_PERS_KJOENN 2021-07-16 as gender

create-dataset full_time
import db/ARBLONN_ARB_H3LDELTID 2021-07-16 as full_part
import db/ARBLONN_ARB_ARBEIDSTID 2021-07-16 as working_hours
import db/ARBEIDSFORHOLD_PERSON as person_id
tabulate full_part
keep if full_part == '1'
destring full_part
collapse(sum) full_part working_hours, by(person_id)
rename full_part full
rename working_hours working_hours_full
merge full working_hours_full into vestland

create-dataset part_time
import db/ARBLONN_ARB_H3LDELTID 2021-07-16 as full_part
import db/ARBLONN_ARB_ARBEIDSTID 2021-07-16 as working_hours
import db/ARBEIDSFORHOLD_PERSON as person_id
keep if full_part == '2'
destring full_part
replace full_part = full_part/2
collapse(sum) full_part working_hours, by(person_id)
rename full_part part
rename working_hours working_hours_part
merge part working_hours_part into vestland

use vestland
tabulate full
tabulate part
tabulate part gender, rowpct freq
tabulate part, summarize(working_hours_part)
tabulate gender, summarize(working_hours_part)
tabulate gender, summarize(working_hours_full)


textblock
3) Case about shift schedules
-----------------------------------------------
endblock

// Create a dataset for employed individuals (individual data)
create-dataset employed2
import db/ARBLONN_PERS_KJOENN 2021-07-16 as gender
import db/ARBLONN_PERS_ALDER 2021-07-16 as age

// Create a dataset for employment relationships (job data)
create-dataset employment_relationships2
import db/ARBLONN_ARB_TID_ORDNING 2021-07-16 as work_schedule
import db/ARBLONN_ARB_ARBEIDSTID 2021-07-16 as working_hours
import db/ARBEIDSFORHOLD_PERSON as person_id

tabulate work_schedule

// Aggregate from job data to individual data level by summing working hours and position percentage per person. Then merge job information into the employed individual dataset
keep if work_schedule == 'dogn355'
generate num_shifts = 1
collapse (sum) num_shifts working_hours, by(person_id)
merge num_shifts working_hours into employed2

// Create shift statistics
use employed2
tabulate num_shifts, summarize(working_hours) mean freq

generate shift = 0
replace shift = 1 if num_shifts >= 1

summarize working_hours if shift

tabulate shift, cellpct freq
tabulate gender shift, rowpct freq
tabulate gender if shift, summarize(working_hours)

generate age_group = 1
replace age_group = 2 if age > 25
replace age_group = 3 if age > 40
replace age_group = 4 if age > 60
assign-labels age_group age_label

tabulate age_group shift, rowpct freq
tabulate age_group if shift, summarize(working_hours)


textblock
4) Data at job level: How to link individual data with job data 
-----------------------------------------------------------------
endblock

create-dataset jobs
import db/ARBLONN_ARB_YRKE_STYRK08 2021-07-16 as occupation
import db/ARBLONN_ARB_ARBEIDSTID 2021-07-16 as working_hours
import db/ARBLONN_LONN_EKV_IALT 2021-06-30 as monthly_salary_full_time_equiv
import db/ARBEIDSFORHOLD_PERSON as person_id

create-dataset individuals
import db/ARBLONN_PERS_KJOENN 2021-07-16 as gender
merge gender into jobs on person_id

use jobs
tabulate occupation gender, summarize(monthly_salary_full_time_equiv)
tabulate occupation gender, summarize(working_hours)


textblock
5) Case with outer join to include individuals who start jobs during the observation period
------------------------------------------------------------------------------------------
endblock

create-dataset all_jobs
import db/ARBLONN_ARB_ARBEIDSTID 2021-01-16 as working_hours2101
import db/ARBLONN_ARB_ARBEIDSTID 2021-02-16 as working_hours2102, outer_join
import db/ARBLONN_ARB_ARBEIDSTID 2021-03-16 as working_hours2103, outer_join

summarize