Related post:
https://stharrold.github.io/20160110-etl-census-with-python.html
Data documentation:
https://www.census.gov/programs-surveys/acs/technical-documentation/pums/documentation.2013.html
cd ~
/home/samuel_harrold
# Import standard packages.
import collections
import functools
import os
import pdb # Debug with pdb.
import subprocess
import sys
import time
# Import installed packages.
import numpy as np
import pandas as pd
# Import local packages.
# Insert current directory into module search path.
# Autoreload local packages after editing.
# `dsdemos` version: https://github.com/stharrold/dsdemos/releases/tag/v0.0.3
sys.path.insert(0, os.path.join(os.path.curdir, r'dsdemos'))
%reload_ext autoreload
%autoreload 2
import dsdemos as dsd
print("Timestamp:")
print(time.strftime(r'%Y-%m-%dT%H:%M:%S%Z', time.gmtime()))
print()
print("Versions:")
print("Python:", sys.version_info)
print("numpy:", np.__version__)
print("pandas:", pd.__version__)
Timestamp: 2016-02-08T04:30:52GMT Versions: Python: sys.version_info(major=3, minor=5, micro=1, releaselevel='final', serial=0) numpy: 1.10.2 pandas: 0.17.1
File sources:
# File paths
path_static = os.path.join(os.path.expanduser(r'~'), r'stharrold.github.io/content/static')
basename = r'20160110-etl-census-with-python'
filename = basename
path_ipynb = os.path.join(path_static, basename, filename+'.ipynb')
path_disk = os.path.abspath(r'/mnt/disk-20151227t211000z/')
path_acs = os.path.join(path_disk, r'www2-census-gov/programs-surveys/acs/')
path_pcsv = os.path.join(path_acs, r'data/pums/2013/5-Year/ss13pdc.csv') # 'pdc' = 'person DC'
path_hcsv = os.path.join(path_acs, r'data/pums/2013/5-Year/ss13hdc.csv') # 'hdc' = 'housing DC'
path_ecsv = os.path.join(path_acs, r'tech_docs/pums/estimates/pums_estimates_9_13.csv')
path_dtxt = os.path.join(path_acs, r'tech_docs/pums/data_dict/PUMS_Data_Dictionary_2009-2013.txt')
# Weights
pwt = 'PWGTP' # person weight
pwts = [pwt+str(inum) for inum in range(1, 81)]
hwt = 'WGTP' # housing weight
hwts = [hwt+str(inum) for inum in range(1, 81)]
print("`ddict`: Load the data dictionary and display the hierarchical structure.")
# Only `ddict` is used below.
# The hierarchical data frame is only for display.
ddict = dsd.census.parse_pumsdatadict(path=path_dtxt)
tmp = dict()
for record_type in ddict['record_types']:
tmp[record_type] = pd.DataFrame.from_dict(ddict['record_types'][record_type], orient='index')
pd.concat(tmp, names=['record_type', 'var_name']).head()
`ddict`: Load the data dictionary and display the hierarchical structure.
| length | description | var_codes | notes | ||
|---|---|---|---|---|---|
| record_type | var_name | ||||
| HOUSING RECORD | ACR | 1 | Lot size | {'b': 'N/A (GQ/not a one-family house or mobil... | NaN |
| ADJHSG | 7 | Adjustment factor for housing dollar amounts (... | {'1086032': '2009 factor', '1068395': '2010 fa... | [Note: The values of ADJHSG inflation-adjusts ... | |
| ADJINC | 7 | Adjustment factor for income and earnings doll... | {'1085467': '2009 factor (0.999480 * 1.0860317... | [Note: The values of ADJINC inflation-adjusts ... | |
| AGS | 1 | Sales of Agriculture Products (Yearly sales) | {'b': 'N/A (GQ/vacant/not a one-family house o... | [Note: No adjustment factor is applied to AGS.] | |
| BATH | 1 | Bathtub or shower | {'b': 'N/A (GQ)', '1': 'Yes', '2': 'No'} | NaN |
print("`ddict`: First 10 unstructured notes from end of file.")
ddict['notes'][:10]
`ddict`: First 10 unstructured notes from end of file.
['* In cases where the SOC occupation code ends in X(s) or Y(s), two or more SOC', 'occupation codes were aggregated to correspond to a specific Census occupation', 'code. In these cases, the Census occupation description is used for the SOC', 'occupation title."', '** These codes are pseudo codes developed by the Census Bureau and are not', ' official or equivalent NAICS or SOC codes.', 'Legend to Identify NAICS Equivalents', ' M = Multiple NAICS codes', ' P = Part of a NAICS code - NAICS code split between two or more Census', ' codes']
print("`dfp`, `dfh`: Load person and housing records.")
time_start = time.perf_counter()
for path in [path_pcsv, path_hcsv]:
with open(path) as fobj:
nlines = sum(1 for _ in fobj)
with open(path) as fobj:
first_line = fobj.readline()
ncols = first_line.count(',')+1
print("{path}:".format(path=path))
print(" size (MB) = {size:.1f}".format(size=os.path.getsize(path)/1e6))
print(" num lines = {nlines}".format(nlines=nlines))
print(" num columns = {ncols}".format(ncols=ncols))
print()
# For ss13pdc.csv, low_memory=False since otherwise pandas raises DtypeWarning.
dfp = pd.read_csv(path_pcsv, low_memory=False)
dfh = pd.read_csv(path_hcsv, low_memory=True)
for (name, df) in [('dfp', dfp), ('dfh', dfh)]:
print("{name} RAM usage (MB) = {mem:.1f}".format(
name=name, mem=df.memory_usage().sum()/1e6))
time_stop = time.perf_counter()
print()
print("Time elapsed (sec) = {diff:.1f}".format(diff=time_stop-time_start))
`dfp`, `dfh`: Load person and housing records.
/mnt/disk-20151227t211000z/www2-census-gov/programs-surveys/acs/data/pums/2013/5-Year/ss13pdc.csv:
size (MB) = 30.5
num lines = 30560
num columns = 295
/mnt/disk-20151227t211000z/www2-census-gov/programs-surveys/acs/data/pums/2013/5-Year/ss13hdc.csv:
size (MB) = 13.5
num lines = 17501
num columns = 205
dfp RAM usage (MB) = 72.1
dfh RAM usage (MB) = 28.7
Time elapsed (sec) = 2.0
print("`dfp`: First 5 person records.")
dfp.head()
`dfp`: First 5 person records.
| RT | SERIALNO | SPORDER | PUMA00 | PUMA10 | ST | ADJINC | PWGTP | AGEP | CIT | ... | PWGTP71 | PWGTP72 | PWGTP73 | PWGTP74 | PWGTP75 | PWGTP76 | PWGTP77 | PWGTP78 | PWGTP79 | PWGTP80 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | P | 2009000000403 | 1 | 102 | -9 | 11 | 1085467 | 20 | 38 | 1 | ... | 6 | 26 | 31 | 32 | 26 | 6 | 36 | 6 | 19 | 20 |
| 1 | P | 2009000001113 | 1 | 103 | -9 | 11 | 1085467 | 13 | 78 | 1 | ... | 13 | 30 | 12 | 13 | 4 | 4 | 18 | 24 | 4 | 21 |
| 2 | P | 2009000001113 | 2 | 103 | -9 | 11 | 1085467 | 25 | 39 | 1 | ... | 26 | 50 | 23 | 20 | 8 | 7 | 38 | 41 | 7 | 37 |
| 3 | P | 2009000001113 | 3 | 103 | -9 | 11 | 1085467 | 17 | 8 | 1 | ... | 15 | 32 | 17 | 15 | 6 | 4 | 26 | 32 | 5 | 30 |
| 4 | P | 2009000001978 | 1 | 103 | -9 | 11 | 1085467 | 37 | 53 | 1 | ... | 65 | 12 | 13 | 37 | 36 | 41 | 57 | 36 | 11 | 33 |
5 rows × 295 columns
print("`dfp`: First 5 housing records.")
dfh.head()
`dfp`: First 5 housing records.
| insp | RT | SERIALNO | DIVISION | PUMA00 | PUMA10 | REGION | ST | ADJHSG | ADJINC | ... | WGTP71 | WGTP72 | WGTP73 | WGTP74 | WGTP75 | WGTP76 | WGTP77 | WGTP78 | WGTP79 | WGTP80 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 600 | H | 2009000000403 | 5 | 102 | -9 | 3 | 11 | 1086032 | 1085467 | ... | 6 | 25 | 30 | 32 | 26 | 6 | 36 | 6 | 18 | 19 |
| 1 | NaN | H | 2009000001113 | 5 | 103 | -9 | 3 | 11 | 1086032 | 1085467 | ... | 14 | 29 | 12 | 12 | 4 | 4 | 18 | 23 | 4 | 22 |
| 2 | 480 | H | 2009000001978 | 5 | 103 | -9 | 3 | 11 | 1086032 | 1085467 | ... | 65 | 12 | 14 | 37 | 36 | 41 | 57 | 36 | 11 | 34 |
| 3 | NaN | H | 2009000002250 | 5 | 105 | -9 | 3 | 11 | 1086032 | 1085467 | ... | 4 | 4 | 4 | 4 | 23 | 14 | 11 | 4 | 20 | 21 |
| 4 | 2500 | H | 2009000002985 | 5 | 101 | -9 | 3 | 11 | 1086032 | 1085467 | ... | 66 | 45 | 10 | 35 | 34 | 10 | 34 | 55 | 50 | 10 |
5 rows × 205 columns
print(
r"""`dfp`, `dfh`, `ddict`: Describe all columns ('variables') that aren't weights or flags.
Printed format:
[PERSON, HOUSING] RECORD
COL: Column name.
Column description.
Multi-line optional column notes.
1-3 line description of value meanings ('variable codes').
Multi-line statistical description and data type.
...
num columns described = ncols""")
print()
records_dfs = collections.OrderedDict([
('PERSON RECORD', {'dataframe': dfp, 'weight': pwt, 'replicate_weights': pwts}),
('HOUSING RECORD', {'dataframe': dfh, 'weight': hwt, 'replicate_weights': hwts})])
for record_type in records_dfs:
print(record_type)
df = records_dfs[record_type]['dataframe']
ncols_desc = 0 # number of columns described
for col in df.columns:
if col in ddict['record_types'][record_type]:
col_dict = ddict['record_types'][record_type][col]
desc = col_dict['description']
else:
col_dict = None
desc = 'Column not in data dictionary.'
if not (
(col.startswith('F') and (desc.endswith(' flag') or desc.endswith(' edit')))
or ('WGTP' in col and "Weight replicate" in desc)):
print("{col}: {desc}".format(col=col, desc=desc))
ncols_desc += 1
if col_dict is not None:
if 'notes' in col_dict:
print(" {notes}".format(notes=col_dict['notes']))
for (inum, var_code) in enumerate(col_dict['var_codes']):
var_code_desc = col_dict['var_codes'][var_code]
print(" {vc}: {vcd}".format(vc=var_code, vcd=var_code_desc))
if inum >= 2:
print(" ...")
break
print(' '+repr(df[col].describe()).replace('\n', '\n '))
print("num columns described = {ncd}".format(ncd=ncols_desc))
print()
`dfp`, `dfh`, `ddict`: Describe all columns ('variables') that aren't weights or flags.
Printed format:
[PERSON, HOUSING] RECORD
COL: Column name.
Column description.
Multi-line optional column notes.
1-3 line description of value meanings ('variable codes').
Multi-line statistical description and data type.
...
num columns described = ncols
PERSON RECORD
RT: Record Type
P: Person Record
count 30559
unique 1
top P
freq 30559
Name: RT, dtype: object
SERIALNO: Housing unit/GQ person serial number
200900000001..201399999999: Unique identifier
count 3.055900e+04
mean 2.011081e+12
std 1.407751e+09
min 2.009000e+12
25% 2.010000e+12
50% 2.011001e+12
75% 2.012001e+12
max 2.013001e+12
Name: SERIALNO, dtype: float64
SPORDER: Person number
01..20: Person number
count 30559.000000
mean 1.850584
std 1.235291
min 1.000000
25% 1.000000
50% 1.000000
75% 2.000000
max 13.000000
Name: SPORDER, dtype: float64
PUMA00: Public use microdata area code (PUMA) based on Census 2000 definition for data collected prior to 2012. Use in combination with PUMA10.
['Note: Public Use Microdata Areas (PUMAs) designate areas of 100,000 or more population. Use with ST for unique code. PUMA00 applies to data collected in calendar years 2011 and earlier. Users of multi-year datasets that contain data from before and after 2012 should use PUMA00 and PUMA10 together where possible, but not all PUMAs will be comparable. See http://tigerweb.geo.census.gov to identify PUMA changes between the two vintages.']
00100..08200: Public use microdata area codes
7777: combination of 01801, 01802, and 01905 in Louisiana
-0009: Code classification is Not Applicable for data collected in 2012 or later
...
count 30559.000000
mean 55.840243
std 55.336541
min -9.000000
25% -9.000000
50% 101.000000
75% 103.000000
max 105.000000
Name: PUMA00, dtype: float64
PUMA10: Public use microdata area code (PUMA) based on 2010 Census definition for data Collected in 2012 or later. Use in combination with PUMA00.
['Note: Public Use Microdata Areas (PUMAs) designate areas of 100,000 or more population. Use with ST for unique code. PUMA10 applies to data collected in calendar year 2012 and later. Users of multi-year datasets that contain data from before and after 2012 should use PUMA00 and PUMA10 together where possible, but not all PUMAs will be comparable. See http://tigerweb.geo.census.gov to identify PUMA changes between the two vintages.']
00100..70301: Public use microdata area codes
-0009: Code classification is Not Applicable for data collected prior to 2012
count 30559.000000
mean 38.259923
std 55.395391
min -9.000000
25% -9.000000
50% -9.000000
75% 103.000000
max 105.000000
Name: PUMA10, dtype: float64
ST: State Code
01: Alabama/AL
02: Alaska/AK
04: Arizona/AZ
...
count 30559
mean 11
std 0
min 11
25% 11
50% 11
75% 11
max 11
Name: ST, dtype: float64
ADJINC: Adjustment factor for income and earnings dollar amounts (6 implied decimal places)
['Note: The values of ADJINC inflation-adjusts reported income to 2013 dollars. ADJINC incorporates an adjustment that annualizes the different rolling reference periods for reported income (as done in the single-year data using the variable ADJINC from the 1-year file) and an adjustment to inflation-adjust the annualized income to 2013 dollars. ADJINC applies to variables FINCP and HINCP in the housing record, and variables INTP, OIP, PAP, PERNP, PINCP, RETP, SEMP, SSIP, SSP, and WAGP in the person record.']
1085467: 2009 factor (0.999480 * 1.08603175)
1076540: 2010 factor (1.007624 * 1.06839475)
1054614: 2011 factor (1.018237 * 1.03572510)
...
count 30559.000000
mean 1048186.138192
std 29716.696630
min 1007549.000000
25% 1024887.000000
50% 1054614.000000
75% 1076540.000000
max 1085467.000000
Name: ADJINC, dtype: float64
PWGTP: Person's weight
00001..09999: Integer weight of person
count 30559.000000
mean 20.268039
std 13.310075
min 1.000000
25% 12.000000
50% 16.000000
75% 24.000000
max 173.000000
Name: PWGTP, dtype: float64
AGEP: Age
00: Under 1 year
01..99: 1 to 99 years (Top-coded***)
count 30559.000000
mean 38.728198
std 21.780122
min 0.000000
25% 23.000000
50% 35.000000
75% 55.000000
max 95.000000
Name: AGEP, dtype: float64
CIT: Citizenship status
1: Born in the U.S.
2: Born in Puerto Rico, Guam, the U.S. Virgin Islands, or the Northern Marianas
3: Born abroad of American parent(s)
...
count 30559.000000
mean 1.471252
std 1.201267
min 1.000000
25% 1.000000
50% 1.000000
75% 1.000000
max 5.000000
Name: CIT, dtype: float64
CITWP05: Year of naturalization write-in for data collected prior to 2012
bbbb: Not eligible - not naturalized
1925: 1925 or earlier (Bottom-coded)
1926: 1926 - 1930
...
count 1595.000000
mean 1110.813166
std 994.495059
min -9.000000
25% -9.000000
50% 1973.000000
75% 1999.000000
max 2011.000000
Name: CITWP05, dtype: float64
CITWP12: Year of naturalization write-in for data collected in 2012 or later
bbbb: Not eligible - not naturalized
1928: 1928 or earlier (Bottom-coded)
1929: 1929 - 1933
...
count 1595.000000
mean 875.462696
std 996.639591
min -9.000000
25% -9.000000
50% -9.000000
75% 1999.000000
max 2013.000000
Name: CITWP12, dtype: float64
COW: Class of worker
b: Not in universe (less than 16 years old/NILF who last worked more than 5 years ago or never worked)
1: Employee of a private for-profit company or business, or of an individual, for wages, salary, or commissions
2: Employee of a private not-for-profit, tax-exempt, or charitable organization
...
count 20557.000000
mean 2.592937
std 1.971727
min 1.000000
25% 1.000000
50% 2.000000
75% 5.000000
max 9.000000
Name: COW, dtype: float64
DDRS: Self-care difficulty
b: N/A (Less than 5 years old)
1: Yes
2: No
...
count 29078.000000
mean 1.969668
std 0.171503
min 1.000000
25% 2.000000
50% 2.000000
75% 2.000000
max 2.000000
Name: DDRS, dtype: float64
DEAR: Hearing difficulty
1: Yes
2: No
count 30559.000000
mean 1.978010
std 0.146654
min 1.000000
25% 2.000000
50% 2.000000
75% 2.000000
max 2.000000
Name: DEAR, dtype: float64
DEYE: Vision difficulty
1: Yes
2: No
count 30559.000000
mean 1.975163
std 0.155631
min 1.000000
25% 2.000000
50% 2.000000
75% 2.000000
max 2.000000
Name: DEYE, dtype: float64
DOUT: Independent living difficulty
b: N/A (Less than 15 years old)
1: Yes
2: No
...
count 26658.000000
mean 1.943432
std 0.231020
min 1.000000
25% 2.000000
50% 2.000000
75% 2.000000
max 2.000000
Name: DOUT, dtype: float64
DPHY: Ambulatory difficulty
b: N/A (Less than 5 years old)
1: Yes
2: No
...
count 29078.000000
mean 1.922966
std 0.266650
min 1.000000
25% 2.000000
50% 2.000000
75% 2.000000
max 2.000000
Name: DPHY, dtype: float64
DRAT: Veteran service connected disability rating (percentage)
b: N/A (No service-connected disability/never served in military)
1: 0 percent
2: 10 or 20 percent
...
count 287.000000
mean 3.400697
std 1.587671
min 1.000000
25% 2.000000
50% 3.000000
75% 5.000000
max 6.000000
Name: DRAT, dtype: float64
DRATX: Veteran service connected disability rating (checkbox)
b: N/A (Less than 17 years old/never served in military)
1: Yes
2: No
...
count 2072.000000
mean 1.861486
std 0.345522
min 1.000000
25% 2.000000
50% 2.000000
75% 2.000000
max 2.000000
Name: DRATX, dtype: float64
DREM: Cognitive difficulty
b: N/A (Less than 5 years old)
1: Yes
2: No
...
count 29078.000000
mean 1.943015
std 0.231817
min 1.000000
25% 2.000000
50% 2.000000
75% 2.000000
max 2.000000
Name: DREM, dtype: float64
ENG: Ability to speak English
b: N/A (less than 5 years old/speaks only English)
1: Very well
2: Well
...
count 4231.000000
mean 1.429449
std 0.750207
min 1.000000
25% 1.000000
50% 1.000000
75% 2.000000
max 4.000000
Name: ENG, dtype: float64
FER: Gave birth to child within the past 12 months
['NOTE: Problems in the collection of data on women who gave birth in the past year (FER) in 2012 led to suppressing this variable in 59 PUMAs within states Florida, Georgia, Kansas, Montana, North Carolina, Ohio and Texas. This only affects 2012 vintage data. See the Estimation section of the Accuracy of the Data for the 2009-2013 5-year PUMS for more information on PUMS estimates using FER. http://www.census.gov/acs/www/data_documentation/pums_documentation/']
b: N/A (less than 15 years/greater than 50 years/ male)
1: Yes
2: No
...
count 9036.000000
mean 1.955069
std 0.207165
min 1.000000
25% 2.000000
50% 2.000000
75% 2.000000
max 2.000000
Name: FER, dtype: float64
GCL: Grandparents living with grandchildren
b: N/A (less than 30 years/institutional GQ)
1: Yes
2: No
...
count 18439.000000
mean 1.968599
std 0.174403
min 1.000000
25% 2.000000
50% 2.000000
75% 2.000000
max 2.000000
Name: GCL, dtype: float64
GCM: Length of time responsible for grandchildren
b: N/A (less than 30 years/grandparent not responsible for grandchild/institutional GQ)
1: Less than 6 months
2: 6 to 11 months
...
count 228.000000
mean 4.026316
std 1.078100
min 1.000000
25% 3.000000
50% 4.000000
75% 5.000000
max 5.000000
Name: GCM, dtype: float64
GCR: Grandparents responsible for grandchildren
b: N/A (less than 30 years/institutional GQ/grandparent not living with grandchild)
1: Yes
2: No
...
count 579.000000
mean 1.606218
std 0.489010
min 1.000000
25% 1.000000
50% 2.000000
75% 2.000000
max 2.000000
Name: GCR, dtype: float64
HINS1: Insurance through a current or former employer or union
1: Yes
2: No
count 30559.000000
mean 1.387120
std 0.487099
min 1.000000
25% 1.000000
50% 1.000000
75% 2.000000
max 2.000000
Name: HINS1, dtype: float64
HINS2: Insurance purchased directly from an insurance company
1: Yes
2: No
count 30559.000000
mean 1.852548
std 0.354562
min 1.000000
25% 2.000000
50% 2.000000
75% 2.000000
max 2.000000
Name: HINS2, dtype: float64
HINS3: Medicare, for people 65 and older, or people with certain disabilities
1: Yes
2: No
count 30559.000000
mean 1.847999
std 0.359028
min 1.000000
25% 2.000000
50% 2.000000
75% 2.000000
max 2.000000
Name: HINS3, dtype: float64
HINS4: Medicaid, Medical Assistance, or any kind of government-assistance plan for those with low incomes or a disability
1: Yes
2: No
count 30559.000000
mean 1.763016
std 0.425239
min 1.000000
25% 2.000000
50% 2.000000
75% 2.000000
max 2.000000
Name: HINS4, dtype: float64
HINS5: TRICARE or other military health care
1: Yes
2: No
count 30559.000000
mean 1.973461
std 0.160734
min 1.000000
25% 2.000000
50% 2.000000
75% 2.000000
max 2.000000
Name: HINS5, dtype: float64
HINS6: VA (including those who have ever used or enrolled for VA health care)
1: Yes
2: No
count 30559.000000
mean 1.981675
std 0.134127
min 1.000000
25% 2.000000
50% 2.000000
75% 2.000000
max 2.000000
Name: HINS6, dtype: float64
HINS7: Indian Health Service
1: Yes
2: No
count 30559.000000
mean 1.999215
std 0.028014
min 1.000000
25% 2.000000
50% 2.000000
75% 2.000000
max 2.000000
Name: HINS7, dtype: float64
INTP: Interest, dividends, and net rental income past 12 months (signed)
['Note: Use values from ADJINC to adjust INTP to constant dollars.']
bbbbbb: N/A (less than 15 years old)
000000: None
-09999..-00001: Loss $1 to $9999 (Rounded and bottom-coded)
...
count 26658.000000
mean 2798.324368
std 18916.559752
min -7700.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 325000.000000
Name: INTP, dtype: float64
JWMNP: Travel time to work
bbb: N/A (not a worker or worker who worked at home)
001..200: 1 to 200 minutes to get to work (Top-coded)
count 14545.000000
mean 29.764043
std 19.584350
min 1.000000
25% 15.000000
50% 30.000000
75% 40.000000
max 142.000000
Name: JWMNP, dtype: float64
JWRIP: Vehicle occupancy
bb: N/A (not a worker or worker whose means of transportation to work was not car, truck, or van)
01: Drove alone
02: In 2-person carpool
...
count 6211.000000
mean 1.224602
std 0.677173
min 1.000000
25% 1.000000
50% 1.000000
75% 1.000000
max 10.000000
Name: JWRIP, dtype: float64
JWTR: Means of transportation to work
bb: N/A (not a worker--not in the labor force, including persons under 16 years; unemployed; employed, with a job but not at work; Armed Forces, with a job but not at work)
01: Car, truck, or van
02: Bus or trolley bus
...
count 15327.000000
mean 3.863900
std 3.554906
min 1.000000
25% 1.000000
50% 2.000000
75% 4.000000
max 12.000000
Name: JWTR, dtype: float64
LANX: Language other than English spoken at home
b: N/A (less than 5 years old)
1: Yes, speaks another language
2: No, speaks only English
...
count 29078.000000
mean 1.854495
std 0.352616
min 1.000000
25% 2.000000
50% 2.000000
75% 2.000000
max 2.000000
Name: LANX, dtype: float64
MAR: Marital status
1: Married
2: Widowed
3: Divorced
...
count 30559.000000
mean 3.659118
std 1.737333
min 1.000000
25% 1.000000
50% 5.000000
75% 5.000000
max 5.000000
Name: MAR, dtype: float64
MARHD: Divorced in the past 12 months
b: N/A (age less than 15 years; never married)
1: Yes
2: No
...
count 12371.000000
mean 1.982297
std 0.131874
min 1.000000
25% 2.000000
50% 2.000000
75% 2.000000
max 2.000000
Name: MARHD, dtype: float64
MARHM: Married in the past 12 months
b: N/A (age less than 15 years; never married)
1: Yes
2: No
...
count 12371.000000
mean 1.954086
std 0.209307
min 1.000000
25% 2.000000
50% 2.000000
75% 2.000000
max 2.000000
Name: MARHM, dtype: float64
MARHT: Number of times married
b: N/A (age less than 15 years; never married)
1: One time
2: Two times
...
count 12371.000000
mean 1.207259
std 0.461325
min 1.000000
25% 1.000000
50% 1.000000
75% 1.000000
max 3.000000
Name: MARHT, dtype: float64
MARHW: Widowed in the past 12 months
b: N/A (age less than 15 years; never married)
1: Yes
2: No
...
count 12371.000000
mean 1.990866
std 0.095140
min 1.000000
25% 2.000000
50% 2.000000
75% 2.000000
max 2.000000
Name: MARHW, dtype: float64
MARHYP05: Year last married for data collected prior to 2012
bbbb: N/A (age less than 15 years; never married)
1928: 1928 or earlier (Bottom-coded)
1929: 1929
...
count 12371.000000
mean 1158.447337
std 982.950909
min -9.000000
25% -9.000000
50% 1963.000000
75% 1994.000000
max 2011.000000
Name: MARHYP05, dtype: float64
MARHYP12: Year last married for data collected in 2012 or later
bbbb: N/A (age less than 15 years; never married)
1932: 1932 or earlier (Bottom-coded)
1933: 1933
...
count 12371.000000
mean 819.956026
std 984.800070
min -9.000000
25% -9.000000
50% -9.000000
75% 1987.000000
max 2013.000000
Name: MARHYP12, dtype: float64
MIG: Mobility status (lived here 1 year ago)
b: N/A (less than 1 year old)
1: Yes, same house (nonmovers)
2: No, outside US and Puerto Rico
...
count 30229.00000
mean 1.37163
std 0.76873
min 1.00000
25% 1.00000
50% 1.00000
75% 1.00000
max 3.00000
Name: MIG, dtype: float64
MIL: Military service
b: N/A (less than 17 years old)
1: Now on active duty
2: On active duty in the past, but not now
...
count 26114.000000
mean 3.842843
std 0.549228
min 1.000000
25% 4.000000
50% 4.000000
75% 4.000000
max 4.000000
Name: MIL, dtype: float64
MLPA: Served September 2001 or later
b: N/A (Less than 17 years old/no active duty)
0: Did not serve this period
1: Served this period
...
count 1855.000000
mean 0.224798
std 0.417562
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 1.000000
Name: MLPA, dtype: float64
MLPB: Served August 1990 - August 2001 (including Persian Gulf War)
b: N/A (Less than 17 years old/no active duty)
0: Did not serve this period
1: Served this period
...
count 1855.000000
mean 0.161186
std 0.367802
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 1.000000
Name: MLPB, dtype: float64
MLPCD: Served May 1975 - July 1990
b: N/A (less than 17 years old/no active duty)
0: Did not serve this period
1: Served this period
...
count 1855.000000
mean 0.232345
std 0.422442
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 1.000000
Name: MLPCD, dtype: float64
MLPE: Served Vietnam era (August 1964 - April 1975)
b: N/A (Less than 17 years old/no active duty)
0: Did not serve this period
1: Served this period
...
count 1855.000000
mean 0.290027
std 0.453897
min 0.000000
25% 0.000000
50% 0.000000
75% 1.000000
max 1.000000
Name: MLPE, dtype: float64
MLPFG: Served February 1955 - July 1964
b: N/A (less than 17 years old/no active duty)
0: Did not serve this period
1: Served this period
...
count 1855.000000
mean 0.145013
std 0.352209
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 1.000000
Name: MLPFG, dtype: float64
MLPH: Served Korean War (July 1950 - January 1955)
b: N/A (Less than 17 years old/no active duty)
0: Did not serve this period
1: Served this period
...
count 1855.000000
mean 0.117520
std 0.322126
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 1.000000
Name: MLPH, dtype: float64
MLPI: Served January 1947 - June 1950
b: N/A (Less than 17 years old/no active duty)
0: Did not serve this period
1: Served this period
...
count 1855.000000
mean 0.017790
std 0.132222
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 1.000000
Name: MLPI, dtype: float64
MLPJ: Served World War II (December 1941 - December 1946)
b: N/A (Less than 17 years old/no active duty)
0: Did not serve this period
1: Served this period
...
count 1855.000000
mean 0.086253
std 0.280814
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 1.000000
Name: MLPJ, dtype: float64
MLPK: Served November 1941 or earlier
b: N/A (Less than 17 years old/no active duty)
0: Did not serve this period
1: Served this period
...
count 1855.000000
mean 0.003774
std 0.061330
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 1.000000
Name: MLPK, dtype: float64
NWAB: Temporary absence from work (UNEDITED-See "Employment Status Recode" (ESR))
b: N/A (less than 16 years old/at work/on layoff)
1: Yes
2: No
...
count 26399.000000
mean 2.564074
std 0.529434
min 1.000000
25% 2.000000
50% 3.000000
75% 3.000000
max 3.000000
Name: NWAB, dtype: float64
NWAV: Available for work (UNEDITED-See "Employment Status Recode" (ESR))
b: N/A (less than 16 years/at work/not looking)
1: Yes
2: No, temporarily ill
...
count 26399.000000
mean 4.530702
std 1.224743
min 1.000000
25% 5.000000
50% 5.000000
75% 5.000000
max 5.000000
Name: NWAV, dtype: float64
NWLA: On layoff from work (UNEDITED-See "Employment Status Recode" (ESR))
b: N/A (less than 16 years old/at work)
1: Yes
2: No
...
count 26399.000000
mean 2.540134
std 0.524830
min 1.000000
25% 2.000000
50% 3.000000
75% 3.000000
max 3.000000
Name: NWLA, dtype: float64
NWLK: Looking for work (UNEDITED-See "Employment Status Recode" (ESR))
b: N/A (less than 16 years old/at work/temporarily absent/informed of recall)
1: Yes
2: No
...
count 26399.000000
mean 2.503315
std 0.635689
min 1.000000
25% 2.000000
50% 3.000000
75% 3.000000
max 3.000000
Name: NWLK, dtype: float64
NWRE: Informed of recall (UNEDITED-See "Employment Status Recode" (ESR))
b: N/A (less than 16 years old/at work/not on layoff)
1: Yes
2: No
...
count 26399.000000
mean 2.903974
std 0.315014
min 1.000000
25% 3.000000
50% 3.000000
75% 3.000000
max 3.000000
Name: NWRE, dtype: float64
OIP: All other income past 12 months
['Note: Use values from ADJINC to adjust OIP to constant dollars.']
bbbbbb: N/A (less than 15 years old)
000000: None
000001..999999: $1 to $999999 (Rounded and top-coded)
...
count 26658.000000
mean 675.345037
std 4722.241622
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 83000.000000
Name: OIP, dtype: float64
PAP: Public assistance income past 12 months
['Note: Use values from ADJINC to adjust PAP to constant dollars.']
bbbbb: N/A (less than 15 years old)
00000: None
00001..99999: $1 to $99999 (Rounded)
...
count 26658.000000
mean 76.790832
std 692.300350
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 25400.000000
Name: PAP, dtype: float64
RELP: Relationship
00: Reference person
01: Husband/wife
02: Biological son or daughter
...
count 30559.000000
mean 3.656795
std 5.569053
min 0.000000
25% 0.000000
50% 1.000000
75% 5.000000
max 17.000000
Name: RELP, dtype: float64
RETP: Retirement income past 12 months
['Note: Use values from ADJINC to adjust RETP to constant dollars.']
bbbbbb: N/A (less than 15 years old)
000000: None
000001..999999: $1 to $999999 (Rounded and top-coded)
...
count 26658.000000
mean 3493.095881
std 15552.960973
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 249000.000000
Name: RETP, dtype: float64
SCH: School enrollment
b: N/A (less than 3 years old)
1: No, has not attended in the last 3 months
2: Yes, public school or public college
...
count 29645.000000
mean 1.376050
std 0.691228
min 1.000000
25% 1.000000
50% 1.000000
75% 2.000000
max 3.000000
Name: SCH, dtype: float64
SCHG: Grade level attending
bb: N/A (not attending school)
01: Nursery school/preschool
02: Kindergarten
...
count 7544.00000
mean 11.28526
std 5.07951
min 1.00000
25% 7.00000
50% 14.00000
75% 15.00000
max 16.00000
Name: SCHG, dtype: float64
SCHL: Educational attainment
bb: N/A (less than 3 years old)
01: No schooling completed
02: Nursery school, preschool
...
count 29645.000000
mean 17.468173
std 5.585758
min 1.000000
25% 16.000000
50% 19.000000
75% 21.000000
max 24.000000
Name: SCHL, dtype: float64
SEMP: Self-employment income past 12 months (signed)
['Note: Use values from ADJINC to adjust SEMP to constant dollars.']
bbbbbb: N/A (less than 15 years old)
000000: None
-10000..-00001: Loss $1 to $10000 (Rounded and bottom-coded)
...
count 26658.000000
mean 2956.785243
std 30447.719592
min -9100.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 727000.000000
Name: SEMP, dtype: float64
SEX: Sex
1: Male
2: Female
count 30559.000000
mean 1.537878
std 0.498571
min 1.000000
25% 1.000000
50% 2.000000
75% 2.000000
max 2.000000
Name: SEX, dtype: float64
SSIP: Supplementary Security Income past 12 months
['Note: Use values from ADJINC to adjust SSIP to constant dollars.']
bbbbb: N/A (less than 15 years old)
00000: None
00001..99999: $1 to $99999 (Rounded)
...
count 26658.000000
mean 296.556381
std 1651.630937
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 30000.000000
Name: SSIP, dtype: float64
SSP: Social Security income past 12 months
['Note: Use values from ADJINC to adjust SSP to constant dollars.']
bbbbb: N/A (less than 15 years old)
00000: None
00001..99999: $1 to $99999 (Rounded)
...
count 26658.000000
mean 1618.331458
std 4844.120790
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 50000.000000
Name: SSP, dtype: float64
WAGP: Wages or salary income past 12 months
['Note: Use values from ADJINC to adjust WAGP to constant dollars.']
bbbbbb: N/A (less than 15 years old)
000000: None
000001..999999: $1 to 999999 (Rounded and top-coded)
...
count 26658.000000
mean 41347.736139
std 69993.911285
min 0.000000
25% 0.000000
50% 15000.000000
75% 60000.000000
max 660000.000000
Name: WAGP, dtype: float64
WKHP: Usual hours worked per week past 12 months
bb: N/A (less than 16 years old/did not work during the past 12 months)
01..98: 1 to 98 usual hours
99: 99 or more usual hours
...
count 17950.000000
mean 39.674485
std 13.046600
min 1.000000
25% 38.000000
50% 40.000000
75% 45.000000
max 99.000000
Name: WKHP, dtype: float64
WKL: When last worked
b: N/A (less than 16 years old)
1: Within the past 12 months
2: 1-5 years ago
...
count 26399.000000
mean 1.550930
std 0.842136
min 1.000000
25% 1.000000
50% 1.000000
75% 2.000000
max 3.000000
Name: WKL, dtype: float64
WKW: Weeks worked during past 12 months
b: N/A (less than 16 years old/did not work during the past 12 months)
1: 50 to 52 weeks
2: 48 to 49 weeks
...
count 17950.000000
mean 1.952201
std 1.698859
min 1.000000
25% 1.000000
50% 1.000000
75% 3.000000
max 6.000000
Name: WKW, dtype: float64
WRK: Worked last week
b: N/A (not reported)
1: Worked
2: Did not work
...
count 24046.000000
mean 1.390086
std 0.487779
min 1.000000
25% 1.000000
50% 1.000000
75% 2.000000
max 2.000000
Name: WRK, dtype: float64
YOEP05: Year of entry for data collected prior to 2012
bbbb: Not eligible - Born in the US
1919: 1919 or earlier (Bottom-coded)
1920: 1920
...
count 4268.000000
mean 1126.390112
std 990.940648
min -9.000000
25% -9.000000
50% 1970.000000
75% 1997.000000
max 2011.000000
Name: YOEP05, dtype: float64
YOEP12: Year of entry for data collected in 2012 or later
bbbb: Not eligible - Born in the US
1921: 1921 or earlier (Bottom-coded)
1922: 1922 - 1923
...
count 4268.000000
mean 856.887769
std 992.461884
min -9.000000
25% -9.000000
50% -9.000000
75% 1995.000000
max 2013.000000
Name: YOEP12, dtype: float64
ANC: Ancestry recode
1: Single
2: Multiple
3: Unclassified
...
count 30559.000000
mean 1.555810
std 0.943709
min 1.000000
25% 1.000000
50% 1.000000
75% 2.000000
max 4.000000
Name: ANC, dtype: float64
ANC1P05: Recoded Detailed Ancestry for data collected prior to 2012 - first entry
001: Alsatian
003: Austrian
005: Basque
...
count 30559.000000
mean 339.916359
std 420.806625
min -9.000000
25% -9.000000
50% 50.000000
75% 902.000000
max 999.000000
Name: ANC1P05, dtype: float64
ANC1P12: Recoded Detailed Ancestry for data collected in 2012 or later - first entry
001: Alsatian
003: Austrian
005: Basque
...
count 30559.000000
mean 251.538041
std 397.645131
min -9.000000
25% -9.000000
50% -9.000000
75% 615.000000
max 999.000000
Name: ANC1P12, dtype: float64
ANC2P05: Recoded Detailed Ancestry for data collected prior to 2012 - second entry
001: Alsatian
003: Austrian
005: Basque
...
count 30559.000000
mean 478.113944
std 494.015425
min -9.000000
25% -9.000000
50% 125.000000
75% 999.000000
max 999.000000
Name: ANC2P05, dtype: float64
ANC2P12: Recoded Detailed Ancestry for data collected in 2012 or later - second entry
001: Alsatian
003: Austrian
005: Basque
...
count 30559.000000
mean 346.208515
std 473.692419
min -9.000000
25% -9.000000
50% -9.000000
75% 999.000000
max 999.000000
Name: ANC2P12, dtype: float64
DECADE: Decade of entry
b: N/A (Born in the US)
1: Before 1950
2: 1950 - 1959
...
count 4268.000000
mean 5.698454
std 1.477502
min 1.000000
25% 5.000000
50% 6.000000
75% 7.000000
max 7.000000
Name: DECADE, dtype: float64
DIS: Disability recode
1: With a disability
2: Without a disability
count 30559.000000
mean 1.871527
std 0.334621
min 1.000000
25% 2.000000
50% 2.000000
75% 2.000000
max 2.000000
Name: DIS, dtype: float64
DRIVESP: Number of vehicles calculated from JWRI
b: N/A (Nonworker or worker who does not drive to work)
1: 1.000 vehicles (Drove alone)
2: 0.500 vehicles (In a 2-person carpool)
...
count 6211.000000
mean 1.217517
std 0.608995
min 1.000000
25% 1.000000
50% 1.000000
75% 1.000000
max 6.000000
Name: DRIVESP, dtype: float64
ESP: Employment status of parents
b: N/A (not own child of householder, and not child in subfamily) Living with two parents:
1: Both parents in labor force
2: Father only in labor force
...
count 4318.000000
mean 4.348541
std 2.889376
min 1.000000
25% 1.000000
50% 5.000000
75% 7.000000
max 8.000000
Name: ESP, dtype: float64
ESR: Employment status recode
b: N/A (less than 16 years old)
1: Civilian employed, at work
2: Civilian employed, with a job but not at work
...
count 26399.000000
mean 2.880071
std 2.322380
min 1.000000
25% 1.000000
50% 1.000000
75% 6.000000
max 6.000000
Name: ESR, dtype: float64
FOD1P: Recoded field of degree - first entry
bbbb: N/A (less than bachelor's degree)
1100: GENERAL AGRICULTURE
1101: AGRICULTURE PRODUCTION AND MANAGEMENT
...
count 12871.000000
mean 4518.419470
std 1596.571638
min 1100.000000
25% 3301.000000
50% 5404.000000
75% 5507.000000
max 6403.000000
Name: FOD1P, dtype: float64
FOD2P: Recoded field of degree - second entry
bbbb: N/A (less than bachelor's degree)
1100: GENERAL AGRICULTURE
1101: AGRICULTURE PRODUCTION AND MANAGEMENT
...
count 2230.000000
mean 4427.388789
std 1610.279474
min 1101.000000
25% 2602.000000
50% 5200.000000
75% 5506.000000
max 6403.000000
Name: FOD2P, dtype: float64
HICOV: Health insurance coverage recode
1: With health insurance coverage
2: No health insurance coverage
count 30559.00000
mean 1.05815
std 0.23403
min 1.00000
25% 1.00000
50% 1.00000
75% 1.00000
max 2.00000
Name: HICOV, dtype: float64
HISP: Recoded detailed Hispanic origin
01: Not Spanish/Hispanic/Latino
02: Mexican
03: Puerto Rican
...
count 30559.000000
mean 1.666808
std 2.950180
min 1.000000
25% 1.000000
50% 1.000000
75% 1.000000
max 24.000000
Name: HISP, dtype: float64
INDP: Industry recode based on 2012 IND codes
['NOTE: Changes were made to this variable between the 2008-2012 and 2009-2013 5-year PUMS files. For additional information on industry groupings within major categories visit our website at: http://www.census.gov/people/io/methodology.']
bbbb: Not in universe (less than 16 years old/NILF who last worked more than 5 years ago or never worked)
0170: AGR-CROP PRODUCTION
0180: AGR-ANIMAL PRODUCTION AND AQUACULTURE
...
count 20557.000000
mean 7685.842584
std 1845.245220
min 170.000000
25% 7270.000000
50% 7870.000000
75% 9160.000000
max 9920.000000
Name: INDP, dtype: float64
JWAP: Time of arrival at work - hour and minute
bbb: N/A (not a worker; worker who worked at home)
001: 12:00 a.m. to 12:04 a.m.
002: 12:05 a.m. to 12:09 a.m.
...
count 14545.000000
mean 106.071640
std 33.565316
min 1.000000
25% 91.000000
50% 100.000000
75% 109.000000
max 284.000000
Name: JWAP, dtype: float64
JWDP: Time of departure for work - hour and minute
bbb: N/A (not a worker; worker who worked at home)
001: 12:00 a.m. to 12:29 a.m.
002: 12:30 a.m. to 12:59 a.m.
...
count 14545.000000
mean 57.893022
std 22.927208
min 1.000000
25% 43.000000
50% 55.000000
75% 64.000000
max 150.000000
Name: JWDP, dtype: float64
LANP05: Language spoken at home for data collected prior to 2012
bbb: N/A (less than 5 years old/speaks only English)
601: Jamaican Creole
607: German
...
count 4231.000000
mean 362.528717
std 336.415892
min -9.000000
25% -9.000000
50% 620.000000
75% 625.000000
max 994.000000
Name: LANP05, dtype: float64
LANP12: Language spoken at home for data collected in 2012 or later
bbb: N/A (less than 5 years old/speaks only English)
601: Jamaican Creole
602: Krio
...
count 4231.000000
mean 290.464429
std 337.743570
min -9.000000
25% -9.000000
50% -9.000000
75% 625.000000
max 994.000000
Name: LANP12, dtype: float64
MIGPUMA00: Migration PUMA based on Census 2000 definition for data collected prior to 2012
bbbbb: N/A (person less than 1 year old/lived in same house 1 year ago)
00001: Did not live in the United States or in Puerto Rico one year ago
00002: Lived in Puerto Rico one year ago and current residence is in the U.S.
...
count 5832.000000
mean 465.729767
std 1096.576145
min -9.000000
25% -9.000000
50% 100.000000
75% 100.000000
max 8100.000000
Name: MIGPUMA00, dtype: float64
MIGPUMA10: Migration PUMA based on 2010 Census definition for data collected in 2012 or later
bbbbb: N/A (person less than 1 year old/lived in same house 1 year ago)
00001: Did not live in the United States or in Puerto Rico one year ago
00002: Lived in Puerto Rico one year ago and current residence is in the U.S.
...
count 5832.000000
mean 1528.504630
std 7819.009818
min -9.000000
25% -9.000000
50% -9.000000
75% 100.000000
max 59300.000000
Name: MIGPUMA10, dtype: float64
MIGSP05: Migration recode for data collected prior to 2012 - State or foreign country code
bbb: N/A (person less than 1 year old/lived in same house 1 year ago)
001: Alabama/AL
002: Alaska/AK
...
count 5832.000000
mean 16.364026
std 57.178932
min -9.000000
25% -9.000000
50% 11.000000
75% 12.000000
max 554.000000
Name: MIGSP05, dtype: float64
MIGSP12: Migration recode for data collected in 2012 or later - State or foreign country code
bbb: N/A (person less than 1 year old/lived in same house 1 year ago)
001: Alabama/AL
002: Alaska/AK
...
count 5832.000000
mean 11.033951
std 50.483716
min -9.000000
25% -9.000000
50% -9.000000
75% 11.000000
max 555.000000
Name: MIGSP12, dtype: float64
MSP: Married, spouse present/spouse absent
b: N/A (age less than 15 years)
1: Now married, spouse present
2: Now married, spouse absent
...
count 26658.000000
mean 4.202491
std 2.152624
min 1.000000
25% 1.000000
50% 6.000000
75% 6.000000
max 6.000000
Name: MSP, dtype: float64
NAICSP: NAICS Industry code based on 2012 NAICS codes
['NOTE: Changes were made to this variable between the 2008-2012 and 2009-2013 5-year PUMS files. For additional information on NAICS groupings within major categories visit our website at: http://www.census.gov/people/io/methodology.']
bbbbbbbb: Not in universe (less than 16 years old/NILF who last worked more than 5 years ago or never worked)
111 : AGR-CROP PRODUCTION
112 : AGR-ANIMAL PRODUCTION AND AQUACULTURE
...
count 20557
unique 223
top 722Z
freq 993
Name: NAICSP, dtype: object
NATIVITY: Nativity
1: Native
2: Foreign born
count 30559.000000
mean 1.123237
std 0.328714
min 1.000000
25% 1.000000
50% 1.000000
75% 1.000000
max 2.000000
Name: NATIVITY, dtype: float64
NOP: Nativity of parent
b: N/A (greater than 17 years old/not an own child of householder, and not child in subfamily)
1: Living with two parents: Both parents NATIVE
2: Living with two parents: Father only FOREIGN BORN
...
count 4312.000000
mean 4.474954
std 2.696136
min 1.000000
25% 1.000000
50% 5.000000
75% 7.000000
max 8.000000
Name: NOP, dtype: float64
OC: Own child
0: No (includes GQ)
1: Yes
count 30559.000000
mean 0.124775
std 0.330469
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 1.000000
Name: OC, dtype: float64
OCCP02: Occupation recode for data collected in 2009 based on 2002 OCC codes
bbbb: Not in universe (less than 16 years old/NILF who last worked more than 5 years ago or never worked)
0010: MGR-CHIEF EXECUTIVES AND LEGISLATORS
0020: MGR-GENERAL AND OPERATIONS MANAGERS
...
count 20557
unique 314
top N.A.
freq 16866
Name: OCCP02, dtype: object
OCCP10: Occupation recode for data collected in 2010 and 2011 based on 2010 OCC codes
bbbb: Not in universe (less than 16 years old/NILF who last worked more than 5 years ago or never worked)
0010: MGR-CHIEF EXECUTIVES AND LEGISLATORS
0020: MGR-GENERAL AND OPERATIONS MANAGERS
...
count 20557
unique 370
top N.A.
freq 12485
Name: OCCP10, dtype: object
OCCP12: Occupation recode for data collected in 2012 or later based on 2010 OCC codes
['NOTE: For additional information on NAICS and SOC groupings within major categories visit our website at: http://www.census.gov/people/io/methodology.']
bbbb: N/A (less than 16 years old/NILF who last worked more than 5 years ago or never worked)
0010: MGR-CHIEF EXECUTIVES AND LEGISLATORS
0020: MGR-GENERAL AND OPERATIONS MANAGERS
...
count 20557
unique 357
top N.A.
freq 11763
Name: OCCP12, dtype: object
PAOC: Presence and age of own children
b: N/A (male/female under 16 years old/GQ)
1: Females with own children under 6 years only
2: Females with own children 6 to 17 years only
...
count 13083.000000
mean 3.616372
std 0.875946
min 1.000000
25% 4.000000
50% 4.000000
75% 4.000000
max 4.000000
Name: PAOC, dtype: float64
PERNP: Total person's earnings
['Note: Use values from ADJINC to adjust PERNP to constant dollars.']
bbbbbbb: N/A (less than 15 years old)
0000000: No earnings
-010000: Loss of $10000 or more (Rounded & bottom-coded components)
...
count 26399.000000
mean 44739.192053
std 77239.316348
min -9100.000000
25% 0.000000
50% 20000.000000
75% 62000.000000
max 1360000.000000
Name: PERNP, dtype: float64
PINCP: Total person's income (signed)
['Note: Use values from ADJINC to adjust PINCP to constant dollars.']
bbbbbbb: N/A (less than 15 years old)
0000000: None
-019999: Loss of $19999 or more (Rounded & bottom-coded components)
...
count 26658.000000
mean 53262.965339
std 82561.741382
min -13600.000000
25% 7200.000000
50% 30000.000000
75% 70000.000000
max 1471000.000000
Name: PINCP, dtype: float64
POBP05: Place of birth (Recode) for data collected prior to 2012
001: Alabama/AL
002: Alaska/AK
004: Arizona/AZ
...
count 30559.000000
mean 29.560948
std 80.011666
min -9.000000
25% -9.000000
50% 11.000000
75% 31.000000
max 554.000000
Name: POBP05, dtype: float64
POBP12: Place of birth (Recode) for data collected in 2012 or later
001: Alabama/AL
002: Alaska/AK
004: Arizona/AZ
...
count 30559.000000
mean 19.925554
std 72.939454
min -9.000000
25% -9.000000
50% -9.000000
75% 12.000000
max 515.000000
Name: POBP12, dtype: float64
POVPIP: Income-to-poverty ratio recode
bbb: N/A
000..500: Below 501 percent
501: 501 percent or more
...
count 28378.000000
mean 331.494045
std 181.116198
min 0.000000
25% 158.000000
50% 398.000000
75% 501.000000
max 501.000000
Name: POVPIP, dtype: float64
POWPUMA00: Place of work PUMA based on Census 2000 definition for data collected prior to 2012
bbbbb: N/A (not a worker--not in the labor force, including persons under 16 years; unemployed; civilian employed, with a job not at work; Armed Forces, with a job but not at work)
00001: Did not work in the United States or in Puerto Rico
00100..08200: Assigned Place of work PUMA. Use with POWSP05.
...
count 15327.000000
mean 144.907810
std 345.593937
min -9.000000
25% -9.000000
50% 100.000000
75% 100.000000
max 6890.000000
Name: POWPUMA00, dtype: float64
POWPUMA10: Place of work PUMA based on 2010 Census definition for data collected in 2012 or later
bbbbb: N/A (not a worker--not in the labor force, including persons under 16 years; unemployed; civilian employed, with a job not at work; Armed Forces, with a job but not at work)
00001: Did not work in the United States or in Puerto Rico
00100..70100: Assigned Place of work PUMA. Use with POWSP12.
...
count 15327.000000
mean 1563.770470
std 8888.575098
min -9.000000
25% -9.000000
50% -9.000000
75% 100.000000
max 59300.000000
Name: POWPUMA10, dtype: float64
POWSP05: Place of work for data collected prior to 2012 - State or foreign country recode
bbb: N/A (not a worker--not in the labor force, including persons under 16 years; unemployed; employed, with a job not at work; Armed Forces, with a job but not at work)
001: Alabama/AL
002: Alaska/AK
...
count 15327.000000
mean 6.471978
std 18.985725
min -9.000000
25% -9.000000
50% 11.000000
75% 11.000000
max 555.000000
Name: POWSP05, dtype: float64
POWSP12: Place of work for data collected in 2012 or later - State or foreign country recode
bbb: N/A (not a worker--not in the labor force, including persons under 16 years; unemployed; employed, with a job not at work; Armed Forces, with a job but not at work)
001: Alabama/AL
002: Alaska/AK
...
count 15327.000000
mean 2.357539
std 17.856144
min -9.000000
25% -9.000000
50% -9.000000
75% 11.000000
max 555.000000
Name: POWSP12, dtype: float64
PRIVCOV: Private health insurance coverage recode
1: With private health insurance coverage
2: Without private health insurance coverage
count 30559.000000
mean 1.283877
std 0.450885
min 1.000000
25% 1.000000
50% 1.000000
75% 2.000000
max 2.000000
Name: PRIVCOV, dtype: float64
PUBCOV: Public health coverage recode
1: With public health coverage
2: Without public health coverage
count 30559.000000
mean 1.651461
std 0.476515
min 1.000000
25% 1.000000
50% 2.000000
75% 2.000000
max 2.000000
Name: PUBCOV, dtype: float64
QTRBIR: Quarter of birth
1: January through March
2: April through June
3: July through September
...
count 30559.000000
mean 2.520665
std 1.111741
min 1.000000
25% 2.000000
50% 3.000000
75% 4.000000
max 4.000000
Name: QTRBIR, dtype: float64
RAC1P: Recoded detailed race code
1: White alone
2: Black or African American alone
3: American Indian alone
...
count 30559.000000
mean 2.025132
std 1.739391
min 1.000000
25% 1.000000
50% 2.000000
75% 2.000000
max 9.000000
Name: RAC1P, dtype: float64
RAC2P05: Recoded detailed race code for data collected prior to 2012
01: White alone
02: Black or African American alone
03: Apache alone
...
count 30559.000000
mean -0.339834
std 13.838677
min -9.000000
25% -9.000000
50% 1.000000
75% 2.000000
max 67.000000
Name: RAC2P05, dtype: float64
RAC2P12: Recoded detailed race code for data collected in 2012 or later
01: White alone
02: Black or African American alone
03: Apache alone
...
count 30559.000000
mean -2.417291
std 13.208816
min -9.000000
25% -9.000000
50% -9.000000
75% 1.000000
max 68.000000
Name: RAC2P12, dtype: float64
RAC3P05: Recoded detailed race code for data collected prior to 2012
01: Some other race alone
02: Other Pacific Islander alone
03: Samoan alone
...
count 30559.000000
mean 27.004778
std 33.030433
min -9.000000
25% -9.000000
50% 44.000000
75% 68.000000
max 72.000000
Name: RAC3P05, dtype: float64
RAC3P12: Recoded detailed race code for data collected in 2012 or later
001: White alone
002: Black or African American alone
003: American Indian and Alaska Native alone
...
count 30559.000000
mean -4.015347
std 7.034421
min -9.000000
25% -9.000000
50% -9.000000
75% 1.000000
max 95.000000
Name: RAC3P12, dtype: float64
RACAIAN: American Indian and Alaska Native recode (American Indian and Alaska Native alone or in combination with one or more other races)
0: No
1: Yes
count 30559.000000
mean 0.009490
std 0.096954
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 1.000000
Name: RACAIAN, dtype: float64
RACASN: Asian recode (Asian alone or in combination with one or more other races)
0: No
1: Yes
count 30559.000000
mean 0.044308
std 0.205781
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 1.000000
Name: RACASN, dtype: float64
RACBLK: Black or African American recode (Black alone or in combination with one or more other races)
0: No
1: Yes
count 30559.000000
mean 0.497464
std 0.500002
min 0.000000
25% 0.000000
50% 0.000000
75% 1.000000
max 1.000000
Name: RACBLK, dtype: float64
RACNHPI: Native Hawaiian and Other Pacific Islander recode (Native Hawaiian and Other Pacific Islander alone or in combination with one or more other races)
0: No
1: Yes
count 30559.000000
mean 0.001080
std 0.032844
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 1.000000
Name: RACNHPI, dtype: float64
RACNUM: Number of major race groups represented
1..6: Race groups
count 30559.000000
mean 1.027291
std 0.185123
min 1.000000
25% 1.000000
50% 1.000000
75% 1.000000
max 4.000000
Name: RACNUM, dtype: float64
RACSOR: Some other race recode (Some other race alone or in combination with one or more other races)
0: No
1: Yes
count 30559.000000
mean 0.027750
std 0.164257
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 1.000000
Name: RACSOR, dtype: float64
RACWHT: White recode (White alone or in combination with one or more other races)
0: No
1: Yes
count 30559.000000
mean 0.447200
std 0.497213
min 0.000000
25% 0.000000
50% 0.000000
75% 1.000000
max 1.000000
Name: RACWHT, dtype: float64
RC: Related child
0: No (includes GQ)
1: Yes
count 30559.000000
mean 0.149939
std 0.357018
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 1.000000
Name: RC, dtype: float64
SCIENGP: Field of degree science and engineering flag - NSF definition
b: N/A (less than a bachelor's degree)
1: Yes
2: No
...
count 12871.000000
mean 1.479605
std 0.499603
min 1.000000
25% 1.000000
50% 1.000000
75% 2.000000
max 2.000000
Name: SCIENGP, dtype: float64
SCIENGRLP: Field of degree science and engineering related flag - NSF definition
b: N/A (less than a bachelor's degree)
1: Yes
2: No
...
count 12871.000000
mean 1.951674
std 0.214462
min 1.000000
25% 2.000000
50% 2.000000
75% 2.000000
max 2.000000
Name: SCIENGRLP, dtype: float64
SFN: Subfamily number
b: N/A (GQ/not in a subfamily)
1: In subfamily 1
2: In subfamily 2
...
count 923
mean 1
std 0
min 1
25% 1
50% 1
75% 1
max 1
Name: SFN, dtype: float64
SFR: Subfamily relationship
b: N/A (GQ/not in a subfamily)
1: Husband/wife no children
2: Husband/wife with children
...
count 923.000000
mean 3.963164
std 1.375627
min 1.000000
25% 3.000000
50% 5.000000
75% 5.000000
max 6.000000
Name: SFR, dtype: float64
SOCP00: SOC Occupation code for data collected in 2009 based on 2000 SOC codes
bbbbbb: Not in universe (less than 16 years old/NILF who last worked more than 5 years ago or never worked)
111021: MGR-GENERAL AND OPERATIONS MANAGERS
1110XX: MGR-CHIEF EXECUTIVES AND LEGISLATORS *
...
count 20557
unique 314
top N.A.//
freq 16866
Name: SOCP00, dtype: object
SOCP10: SOC Occupation code for data collected in 2010 and 2011 based on 2010 SOC codes
bbbbbb: Not in universe (less than 16 years old/NILF who last worked more than 5 years ago or never worked)
1110XX: MGR-CHIEF EXECUTIVES AND LEGISLATORS*
111021: MGR-GENERAL AND OPERATIONS MANAGERS
...
count 20557
unique 370
top N.A.//
freq 12485
Name: SOCP10, dtype: object
SOCP12: SOC Occupation recode for data collected in 2012 or later based on 2010 SOC codes
['NOTE: For additional information on NAICS and SOC groupings within major categories visit our website at: http://www.census.gov/people/io/methodology.']
bbbbbb: N/A (less than 16 years old/NILF who last worked more than 5 years ago or never worked)
1110XX: MGR-CHIEF EXECUTIVES AND LEGISLATORS *
111021: MGR-GENERAL AND OPERATIONS MANAGERS
...
count 20557
unique 357
top N.A.//
freq 11763
Name: SOCP12, dtype: object
VPS: Veteran period of service
bb: N/A (less than 17 years old, no active duty) War Times:
01: Gulf War: 9/2001 or later
02: Gulf War: 9/2001 or later and Gulf War: 8/1990 - 8/2001
...
count 1855.000000
mean 6.997844
std 4.099096
min 1.000000
25% 4.000000
50% 6.000000
75% 11.000000
max 15.000000
Name: VPS, dtype: float64
WAOB: World area of birth ****
1: US state (POB = 001-059)
2: PR and US Island Areas (POB = 060-099)
3: Latin America (POB = 303,310-399)
...
count 30559.000000
mean 1.457999
std 1.235133
min 1.000000
25% 1.000000
50% 1.000000
75% 1.000000
max 8.000000
Name: WAOB, dtype: float64
num columns described = 145
HOUSING RECORD
insp: Column not in data dictionary.
count 6561.000000
mean 999.282731
std 1085.174484
min 0.000000
25% 370.000000
50% 790.000000
75% 1200.000000
max 8600.000000
Name: insp, dtype: float64
RT: Record Type
H: Housing Record or Group Quarters Unit
count 17500
unique 1
top H
freq 17500
Name: RT, dtype: object
SERIALNO: Housing unit/GQ person serial number
2009000000001..2013999999999: Unique identifier
count 1.750000e+04
mean 2.011068e+12
std 1.401911e+09
min 2.009000e+12
25% 2.010000e+12
50% 2.011001e+12
75% 2.012001e+12
max 2.013001e+12
Name: SERIALNO, dtype: float64
DIVISION: Division code
0: Puerto Rico
1: New England (Northeast region)
2: Middle Atlantic (Northeast region)
...
count 17500
mean 5
std 0
min 5
25% 5
50% 5
75% 5
max 5
Name: DIVISION, dtype: float64
PUMA00: Public use microdata area code (PUMA) based on Census 2000 definition for data collected prior to 2012. Use in combination with PUMA10.
['Note: Public Use Microdata Areas (PUMAs) designate areas of 100,000 or more population. Use with ST for unique code. PUMA00 applies to data collected in calendar years 2011 and earlier. Users of multi-year datasets that contain data from before and after 2012 should use PUMA00 and PUMA10 together where possible, but not all PUMAs will be comparable. See http://tigerweb.geo.census.gov to identify PUMA changes between the two vintages.']
00100..08200: Public use microdata area codes
77777: Combination of 01801, 01802, and 01905 in Louisiana
-0009: Code classification is Not Applicable because data collected in 2012 or later
...
count 17500.000000
mean 56.427371
std 55.291036
min -9.000000
25% -9.000000
50% 101.000000
75% 103.000000
max 105.000000
Name: PUMA00, dtype: float64
PUMA10: Public use microdata area code (PUMA) based on 2010 Census definition for data collected in 2012 or later. Use in combination with PUMA00.
['Note: Public Use Microdata Areas (PUMAs) designate areas of 100,000 or more population. Use with ST for unique code. PUMA10 applies to data collected in calendar year 2012 and later. Users of multi-year datasets that contain data from before and after 2012 should use PUMA00 and PUMA10 together where possible, but not all PUMAs will be comparable. See http://tigerweb.geo.census.gov to identify PUMA changes between the two vintages.']
00100..70301: Public use microdata area codes
-0009: Code classification is Not Applicable because data collected prior to 2012
count 17500.000000
mean 37.764171
std 55.358495
min -9.000000
25% -9.000000
50% -9.000000
75% 103.000000
max 105.000000
Name: PUMA10, dtype: float64
REGION: Region code
1: Northeast
2: Midwest
3: South
...
count 17500
mean 3
std 0
min 3
25% 3
50% 3
75% 3
max 3
Name: REGION, dtype: float64
ST: State Code
01: Alabama/AL
02: Alaska/AK
04: Arizona/AZ
...
count 17500
mean 11
std 0
min 11
25% 11
50% 11
75% 11
max 11
Name: ST, dtype: float64
ADJHSG: Adjustment factor for housing dollar amounts (6 implied decimal places)
['Note: The values of ADJHSG inflation-adjusts reported housing costs to 2013 dollars and applies to variables CONP, ELEP, FULP, GASP, GRNTP, INSP, MHP, MRGP, SMOCP, RNTP, SMP, and WATP in the housing record. ADJHSG does not apply to AGS or TAXP because they are categorical variables that should not be inflation-adjusted.']
1086032: 2009 factor
1068395: 2010 factor
1035725: 2011 factor
...
count 17500.000000
mean 1039364.231657
std 31877.254257
min 1000000.000000
25% 1014531.000000
50% 1035725.000000
75% 1068395.000000
max 1086032.000000
Name: ADJHSG, dtype: float64
ADJINC: Adjustment factor for income and earnings dollar amounts (6 implied decimal places)
['Note: The values of ADJINC inflation-adjusts reported income to 2013 dollars. ADJINC incorporates an adjustment that annualizes the different rolling reference periods for reported income (as done in the single-year data using the variable ADJINC from the 1-year file) and an adjustment to inflation-adjust the annualized income to 2013 dollars. ADJINC applies to variables FINCP and HINCP in the housing record, and variables INTP, OIP, PAP, PERNP, PINCP, RETP, SEMP, SSIP, SSP, and WAGP in the person record.']
1085467: 2009 factor (0.999480 * 1.08603175)
1076540: 2010 factor (1.007624 * 1.06839475)
1054614: 2011 factor (1.018237 * 1.03572510)
...
count 17500.000000
mean 1048478.770229
std 29598.269890
min 1007549.000000
25% 1024887.000000
50% 1054614.000000
75% 1076540.000000
max 1085467.000000
Name: ADJINC, dtype: float64
WGTP: Housing Weight
0000: Group Quarter placeholder record
00001..09999: Integer weight of housing unit
count 17500.000000
mean 17.047257
std 13.878535
min 0.000000
25% 10.000000
50% 14.000000
75% 22.000000
max 172.000000
Name: WGTP, dtype: float64
NP: Number of person records following this housing record
00: Vacant unit
01: One person record (one person in household or any person in group quarters)
02..20: Number of person records (number of persons in household)
...
count 17500.000000
mean 1.746229
std 1.291371
min 0.000000
25% 1.000000
50% 1.000000
75% 2.000000
max 13.000000
Name: NP, dtype: float64
TYPE: Type of unit
1: Housing unit
2: Institutional group quarters
3: Noninstitutional group quarters
...
count 17500.000000
mean 1.268514
std 0.655686
min 1.000000
25% 1.000000
50% 1.000000
75% 1.000000
max 3.000000
Name: TYPE, dtype: float64
ACR: Lot size
b: N/A (GQ/not a one-family house or mobile home)
1: House on less than one acre
2: House on one to less than ten acres
...
count 6388.000000
mean 1.028961
std 0.182034
min 1.000000
25% 1.000000
50% 1.000000
75% 1.000000
max 3.000000
Name: ACR, dtype: float64
AGS: Sales of Agriculture Products (Yearly sales)
['Note: No adjustment factor is applied to AGS.']
b: N/A (GQ/vacant/not a one-family house or mobile home/less than 1 acre)
1: None
2: $ 1 - $ 999
...
count 164.000000
mean 1.201220
std 0.934544
min 1.000000
25% 1.000000
50% 1.000000
75% 1.000000
max 6.000000
Name: AGS, dtype: float64
BATH: Bathtub or shower
b: N/A (GQ)
1: Yes
2: No
...
count 14844.000000
mean 1.005322
std 0.072760
min 1.000000
25% 1.000000
50% 1.000000
75% 1.000000
max 2.000000
Name: BATH, dtype: float64
BDSP: Number of bedrooms
bb : N/A (GQ)
00..99: 0 to 99 bedrooms (Top-coded)
count 14844.000000
mean 2.171989
std 1.452519
min 0.000000
25% 1.000000
50% 2.000000
75% 3.000000
max 14.000000
Name: BDSP, dtype: float64
BLD: Units in structure
bb: N/A (GQ)
01: Mobile home or trailer
02: One-family house detached
...
count 14844.000000
mean 5.365131
std 2.671220
min 2.000000
25% 3.000000
50% 5.000000
75% 8.000000
max 9.000000
Name: BLD, dtype: float64
BUS: Business or medical office on property
b: N/A (GQ/not a one-family house or mobile home)
1: Yes
2: No
...
count 6388.000000
mean 1.986381
std 0.115913
min 1.000000
25% 2.000000
50% 2.000000
75% 2.000000
max 2.000000
Name: BUS, dtype: float64
CONP: Condo fee (monthly amount)
['Note: Use values from ADJHSG to adjust CONP to constant dollars.']
bbbb: N/A (GQ/vacant/not owned or being bought)
0000: Not condo
0001..9999: $1 - $9999 (Rounded and top-coded)
...
count 14844.000000
mean 51.061035
std 175.491301
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 1700.000000
Name: CONP, dtype: float64
ELEP: Electricity (monthly cost)
['Note: Use values from ADJHSG to adjust ELEP values 3 and over to constant dollars.']
bbb: N/A (GQ/vacant)
001: Included in rent or in condo fee
002: No charge or electricity not used
...
count 13737.000000
mean 92.849967
std 94.873654
min 1.000000
25% 30.000000
50% 70.000000
75% 130.000000
max 570.000000
Name: ELEP, dtype: float64
FS: Yearly food stamp/Supplemental Nutrition Assistance Program recipiency
b: N/A (vacant)
1: Yes
2: No
...
count 16393.000000
mean 1.874093
std 0.331755
min 1.000000
25% 2.000000
50% 2.000000
75% 2.000000
max 2.000000
Name: FS, dtype: float64
FULP: Fuel cost(yearly cost for fuels other than gas and electricity)
['Note: Use values from ADJHSG to adjust FULP values 3 and over to constant dollars.']
bbbb: N/A (GQ/vacant)
0001: Included in rent or in condo fee
0002: No charge or these fuels not used
...
count 13737.000000
mean 45.400306
std 311.391053
min 1.000000
25% 2.000000
50% 2.000000
75% 2.000000
max 5200.000000
Name: FULP, dtype: float64
GASP: Gas (monthly cost)
['Note: Use values from ADJHSG to adjust GASP values 4 and over to constant dollars.']
bbb: N/A (GQ/vacant)
001: Included in rent or in condo fee
002: Included in electricity payment
...
count 13737.000000
mean 56.089758
std 88.354993
min 1.000000
25% 3.000000
50% 10.000000
75% 80.000000
max 580.000000
Name: GASP, dtype: float64
HFL: House heating fuel
b: N/A (GQ/vacant)
1: Utility gas
2: Bottled, tank, or LP gas
...
count 13737.000000
mean 1.878867
std 1.306775
min 1.000000
25% 1.000000
50% 1.000000
75% 3.000000
max 9.000000
Name: HFL, dtype: float64
MHP: Mobile home costs (yearly amount)
['Note: Use values from ADJHSG to adjust MHP to constant dollars.']
bbbbb: N/A (GQ/vacant/not owned or being bought/ not mobile home)
00000: None
00001..99999: $1 to $99999 (Rounded and top-coded)
...
count 0
mean NaN
std NaN
min NaN
25% NaN
50% NaN
75% NaN
max NaN
Name: MHP, dtype: float64
MRGI: First mortgage payment includes fire/hazard/flood insurance
b: N/A (GQ/vacant/not owned or being bought/not mortgaged)
1: Yes, insurance included in payment
2: No, insurance paid separately or no insurance
...
count 4971.000000
mean 1.446590
std 0.497189
min 1.000000
25% 1.000000
50% 1.000000
75% 2.000000
max 2.000000
Name: MRGI, dtype: float64
MRGP: First mortgage payment (monthly amount)
['Note: Use values from ADJHSG to adjust MRGP to constant dollars.']
bbbbb: N/A (GQ/vacant/not owned or being bought/not mortgaged)
00001..99999: $1 to $99999 (Rounded and top-coded)
count 4971.000000
mean 1936.767250
std 1244.418262
min 10.000000
25% 1000.000000
50% 1700.000000
75% 2500.000000
max 8000.000000
Name: MRGP, dtype: float64
MRGT: First mortgage payment includes real estate taxes
b: N/A (GQ/vacant/not owned or being bought/not mortgaged)
1: Yes, taxes included in payment
2: No, taxes paid separately or taxes not required
...
count 4971.000000
mean 1.281633
std 0.449841
min 1.000000
25% 1.000000
50% 1.000000
75% 2.000000
max 2.000000
Name: MRGT, dtype: float64
MRGX: First mortgage status
b: N/A (GQ/vacant/not owned or being bought)
1: Mortgage, deed of trust, or similar debt
2: Contract to purchase
...
count 6561.000000
mean 1.490626
std 0.857150
min 1.000000
25% 1.000000
50% 1.000000
75% 1.000000
max 3.000000
Name: MRGX, dtype: float64
REFR: Refrigerator
b: N/A (GQ)
1: Yes
2: No
...
count 14844.000000
mean 1.007949
std 0.088807
min 1.000000
25% 1.000000
50% 1.000000
75% 1.000000
max 2.000000
Name: REFR, dtype: float64
RMSP: Number of Rooms
bb: N/A (GQ)
00..99: Rooms (Top-coded)
count 14844.000000
mean 5.139854
std 2.895583
min 1.000000
25% 3.000000
50% 4.000000
75% 7.000000
max 24.000000
Name: RMSP, dtype: float64
RNTM: Meals included in rent
b: N/A (GQ/not a rental unit/occupied without rent payment)
1: Yes
2: No
...
count 7373.000000
mean 1.987658
std 0.110416
min 1.000000
25% 2.000000
50% 2.000000
75% 2.000000
max 2.000000
Name: RNTM, dtype: float64
RNTP: Monthly rent
['Note: Use values from ADJHSG to adjust RNTP to constant dollars.']
bbbbb: N/A (GQ/not a rental unit/occupied without rent payment)
00001..99999: $1 to $99999 (Rounded and top-coded)
count 7373.000000
mean 1246.782856
std 769.088231
min 4.000000
25% 730.000000
50% 1100.000000
75% 1700.000000
max 3900.000000
Name: RNTP, dtype: float64
RWAT: Hot and cold running water
b: N/A (GQ)
1: Yes
2: No
...
count 14844.000000
mean 1.007343
std 0.085379
min 1.000000
25% 1.000000
50% 1.000000
75% 1.000000
max 2.000000
Name: RWAT, dtype: float64
RWATPR: Running water
b: N/A (GQ)
1: Yes
2: No
...
count 14844
mean 9
std 0
min 9
25% 9
50% 9
75% 9
max 9
Name: RWATPR, dtype: float64
SINK: Sink with a faucet
b: N/A (GQ)
1: Yes
2: No
...
count 14844.000000
mean 1.005457
std 0.073670
min 1.000000
25% 1.000000
50% 1.000000
75% 1.000000
max 2.000000
Name: SINK, dtype: float64
SMP: Total payment on all second and junior mortgages and home equity loans (monthly amount)
['Note: Use ADJHSG to adjust SMP to constant dollars.']
bbbbb: N/A (GQ/vacant/not owned or being bought/ no second or junior mortgages or home equity loans)
00001..99999: $1 to $99999 (Rounded and top-coded)
count 1228.000000
mean 506.485342
std 569.513130
min 4.000000
25% 170.000000
50% 350.000000
75% 600.000000
max 4100.000000
Name: SMP, dtype: float64
STOV: Stove or range
b: N/A (GQ)
1: Yes
2: No
...
count 14844.000000
mean 1.010105
std 0.100018
min 1.000000
25% 1.000000
50% 1.000000
75% 1.000000
max 2.000000
Name: STOV, dtype: float64
TEL: Telephone
['NOTE: Problems in the collection of data on the availability of telephone service (TEL) in 2012 led to suppressing this variable in six PUMAs in Georgia. This only affects 2012 vintage data. See the Estimation section of the Accuracy of the Data for the 2009-2013 5-year PUMS for more information on PUMS estimates using TEL. http://www.census.gov/acs/www/data_documentation/pums_documentation/']
b: N/A (GQ/vacant)
1: Yes
2: No
...
count 13737.000000
mean 1.029701
std 0.169767
min 1.000000
25% 1.000000
50% 1.000000
75% 1.000000
max 2.000000
Name: TEL, dtype: float64
TEN: Tenure
b: N/A (GQ/vacant)
1: Owned with mortgage or loan (include home equity loans)
2: Owned free and clear
...
count 13737.000000
mean 2.174128
std 0.945958
min 1.000000
25% 1.000000
50% 3.000000
75% 3.000000
max 4.000000
Name: TEN, dtype: float64
TOIL: Flush toilet
b: N/A (GQ)
1: Yes
2: No
...
count 14844.000000
mean 1.005591
std 0.074569
min 1.000000
25% 1.000000
50% 1.000000
75% 1.000000
max 2.000000
Name: TOIL, dtype: float64
VACS: Vacancy status
b: N/A (GQ/occupied)
1: For rent
2: Rented, not occupied
...
count 1107.000000
mean 4.214995
std 2.565742
min 1.000000
25% 1.000000
50% 4.000000
75% 7.000000
max 7.000000
Name: VACS, dtype: float64
VALP: Property value
bbbbbbb: N/A (GQ/vacant units, except �for-sale-only� and �sold, not occupied�/not owned or being bought)
0000000: $0 (applies to 2009 and 2010 only)
0000001..9999999: $1 to $9999999 (Rounded and top-coded)
...
count 6741.000000
mean 576821.155615
std 578742.887940
min 180.000000
25% 290000.000000
50% 410000.000000
75% 700000.000000
max 5303000.000000
Name: VALP, dtype: float64
VEH: Vehicles (1 ton or less) available
b: N/A (GQ/vacant)
0: No vehicles
1: 1 vehicle
...
count 13737.000000
mean 0.925311
std 0.869523
min 0.000000
25% 0.000000
50% 1.000000
75% 1.000000
max 6.000000
Name: VEH, dtype: float64
WATP: Water (yearly cost)
['Note: Use values from ADJHSG to adjust WATP values 3 and over to constant dollars.']
bbbb: N/A (GQ/vacant)
0001: Included in rent or in condo fee
0002: No charge
...
count 13737.000000
mean 239.380724
std 412.715268
min 1.000000
25% 1.000000
50% 2.000000
75% 390.000000
max 3900.000000
Name: WATP, dtype: float64
YBL: When structure first built
bb: N/A (GQ)
01: 1939 or earlier
02: 1940 to 1949
...
count 14844.000000
mean 3.239289
std 2.768117
min 1.000000
25% 1.000000
50% 2.000000
75% 4.000000
max 16.000000
Name: YBL, dtype: float64
FES: Family type and employment status
b: N/A (GQ/vacant/not a family)
1: Married-couple family: Husband and wife in LF
2: Married-couple family: Husband in labor force, wife not in LF
...
count 5929.000000
mean 3.998988
std 2.787092
min 1.000000
25% 1.000000
50% 4.000000
75% 7.000000
max 8.000000
Name: FES, dtype: float64
FINCP: Family income (past 12 months)
['Note: Use values from ADJINC to adjust FINCP to constant dollars.']
bbbbbbbb: N/A (GQ/vacant)
00000000: No family income
-0059999: Loss of -$59,999 or more
...
count 5953.000000
mean 130596.028725
std 154445.776061
min 0.000000
25% 36000.000000
50% 84500.000000
75% 166000.000000
max 2087000.000000
Name: FINCP, dtype: float64
FPARC: Family presence and age of related children
b: N/A (GQ/vacant/not a family)
1: With related children under 5 years only
2: With related children 5 to 17 years only
...
count 5953.000000
mean 3.076432
std 1.124643
min 1.000000
25% 2.000000
50% 4.000000
75% 4.000000
max 4.000000
Name: FPARC, dtype: float64
GRNTP: Gross rent (monthly amount)
['Note: Use values from ADJHSG to adjust GRNTP to constant dollars.']
bbbbb: N/A (GQ/vacant/not a rental unit/occupied without rent payment)
00001..99999: $1 - $99999 (Components are rounded)
count 6989.000000
mean 1349.524109
std 791.201141
min 4.000000
25% 819.000000
50% 1200.000000
75% 1800.000000
max 4510.000000
Name: GRNTP, dtype: float64
GRPIP: Gross rent as a percentage of household income past 12 months
bbb: N/A (GQ/vacant/not a rental unit/occupied without rent payment/no household income)
001..100: 1% to 100%
101: 101% or more
...
count 6805.000000
mean 39.897575
std 28.575415
min 1.000000
25% 20.000000
50% 29.000000
75% 51.000000
max 101.000000
Name: GRPIP, dtype: float64
HHL: Household language
b: N/A (GQ/vacant)
1: English only
2: Spanish
...
count 13737.000000
mean 1.336245
std 0.850583
min 1.000000
25% 1.000000
50% 1.000000
75% 1.000000
max 5.000000
Name: HHL, dtype: float64
HHT: Household/family type
b: N/A (GQ/vacant)
1: Married couple household
2: Other family household:Male householder, no wife present
...
count 13737.000000
mean 3.805198
std 2.036160
min 1.000000
25% 2.000000
50% 4.000000
75% 6.000000
max 7.000000
Name: HHT, dtype: float64
HINCP: Household income (past 12 months)
['Note: Use values from ADJINC to adjust HINCP to constant dollars.']
bbbbbbbb: N/A(GQ/vacant)
00000000: No household income
-0059999: Loss of -$59,999 or more
...
count 13737.000000
mean 102051.604353
std 125888.164393
min -13600.000000
25% 29200.000000
50% 67000.000000
75% 128000.000000
max 2087000.000000
Name: HINCP, dtype: float64
HUGCL: Household with grandparent living with grandchildren
b: N/A (GQ/vacant)
0: Household without grandparent living with grandchildren
1: Household with grandparent living with grandchildren
...
count 13737.000000
mean 0.033559
std 0.180098
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 1.000000
Name: HUGCL, dtype: float64
HUPAC: HH presence and age of children
b: N/A (GQ/vacant)
1: With children under 6 years only
2: With children 6 to 17 years only
...
count 13737.000000
mean 3.591541
std 0.888889
min 1.000000
25% 4.000000
50% 4.000000
75% 4.000000
max 4.000000
Name: HUPAC, dtype: float64
HUPAOC: HH presence and age of own children
b: N/A (GQ/vacant)
1: Presence of own children under 6 years only
2: Presence of own children 6 to 17 years only
...
count 13737.000000
mean 3.652617
std 0.836272
min 1.000000
25% 4.000000
50% 4.000000
75% 4.000000
max 4.000000
Name: HUPAOC, dtype: float64
HUPARC: HH presence and age of related children
b: N/A (GQ/vacant)
1: Presence of related children under 6 years only
2: Presence of related children 6 to 17 years only
...
count 13737.000000
mean 3.594599
std 0.886682
min 1.000000
25% 4.000000
50% 4.000000
75% 4.000000
max 4.000000
Name: HUPARC, dtype: float64
KIT: Complete kitchen facilities
b: N/A (GQ)
1: Yes, has stove or range, refrigerator, and sink with a faucet
2: No
...
count 14844.000000
mean 1.011587
std 0.107022
min 1.000000
25% 1.000000
50% 1.000000
75% 1.000000
max 2.000000
Name: KIT, dtype: float64
LNGI: Limited English speaking households
b: N/A (GQ/vacant)
1: At least one person in the household 14 and over speaks English only or speaks English 'very well'
2: No one in the household 14 and over speaks English only or speaks English 'very well'
...
count 13737.000000
mean 1.023368
std 0.151073
min 1.000000
25% 1.000000
50% 1.000000
75% 1.000000
max 2.000000
Name: LNGI, dtype: float64
MULTG: Multigenerational Household
b: N/A (GQ/Vacant/NP=0)
1: No, not a multigenerational household
2: Yes, is a multigenerational household
...
count 13737.000000
mean 1.032977
std 0.178582
min 1.000000
25% 1.000000
50% 1.000000
75% 1.000000
max 2.000000
Name: MULTG, dtype: float64
MV: When moved into this house or apartment
b: N/A (GQ/vacant)
1: 12 months or less
2: 13 to 23 months
...
count 13737.000000
mean 3.800466
std 1.936343
min 1.000000
25% 2.000000
50% 4.000000
75% 5.000000
max 7.000000
Name: MV, dtype: float64
NOC: Number of own children in household (unweighted)
bb: N/A(GQ/vacant)
00: No own children
01..19: Number of own children in household
...
count 13737.000000
mean 0.277572
std 0.727938
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 8.000000
Name: NOC, dtype: float64
NPF: Number of persons in family (unweighted)
bb: N/A (GQ/vacant/non-family household)
02..20: Number of persons in family
count 5953.000000
mean 2.893331
std 1.224969
min 2.000000
25% 2.000000
50% 2.000000
75% 3.000000
max 12.000000
Name: NPF, dtype: float64
NPP: Grandparent headed household with no parent present
b: N/A (GQ/vacant)
0: Not a grandparent headed household with no parent present
1: Grandparent headed household with no parent present
...
count 13737.000000
mean 0.004586
std 0.067568
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 1.000000
Name: NPP, dtype: float64
NR: Presence of nonrelative in household
b: N/A (GQ/vacant)
0: None
1: 1 or more nonrelatives
...
count 13737.000000
mean 0.159132
std 0.365813
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 1.000000
Name: NR, dtype: float64
NRC: Number of related children in household (unweighted)
bb: N/A (GQ/vacant)
00: No related children
01..19: Number of related children in household
...
count 13737.000000
mean 0.333552
std 0.806915
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 10.000000
Name: NRC, dtype: float64
OCPIP: Selected monthly owner costs as a percentage of household income during the past 12 months
bbb: N/A (GQ/vacant/not owned or being bought/ no household income)
001..100: 1% to 100%
101: 101% or more
...
count 6500.000000
mean 26.966923
std 23.259257
min 1.000000
25% 12.000000
50% 20.000000
75% 32.000000
max 101.000000
Name: OCPIP, dtype: float64
PARTNER: Unmarried partner household
b: N/A (GQ/vacant)
0: No unmarried partner in household
1: Male householder, male partner
...
count 13737.000000
mean 0.188032
std 0.751881
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 4.000000
Name: PARTNER, dtype: float64
PLM: Complete plumbing facilities
b: N/A (GQ)
1: Yes, has hot and cold running water, a flush toilet, and a bathtub or shower
2: No
...
count 14844.000000
mean 1.008690
std 0.092819
min 1.000000
25% 1.000000
50% 1.000000
75% 1.000000
max 2.000000
Name: PLM, dtype: float64
PSF: Presence of subfamilies in Household
b: N/A (GQ/vacant)
0: No subfamilies
1: 1 or more subfamilies
...
count 13737.000000
mean 0.026789
std 0.161472
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 1.000000
Name: PSF, dtype: float64
R18: Presence of persons under 18 years in household (unweighted)
b: N/A (GQ/vacant)
0: No person under 18 in household
1: 1 or more persons under 18 in household
...
count 13737.000000
mean 0.193638
std 0.395163
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 1.000000
Name: R18, dtype: float64
R60: Presence of persons 60 years and over in household (unweighted)
b: N/A (GQ/vacant)
0: No person 60 and over
1: 1 person 60 and over
...
count 13737.000000
mean 0.415447
std 0.647227
min 0.000000
25% 0.000000
50% 0.000000
75% 1.000000
max 2.000000
Name: R60, dtype: float64
R65: Presence of persons 65 years and over in household (unweighted)
b: N/A (GQ/vacant)
0: No person 65 and over
1: 1 person 65 and over
...
count 13737.000000
mean 0.297809
std 0.568071
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 2.000000
Name: R65, dtype: float64
RESMODE: Response mode
b: N/A (GQ)
1: Mail
2: CATI/CAPI
...
count 14844.000000
mean 1.523579
std 0.657194
min 1.000000
25% 1.000000
50% 1.000000
75% 2.000000
max 3.000000
Name: RESMODE, dtype: float64
SMOCP: Selected monthly owner costs
['Note: Use values from ADJHSG to adjust SMOCP to constant dollars.']
bbbbb: N/A (GQ/vacant/not owned or being bought)
00000: None
00001..99999: $1 - $99999 (Components are rounded)
...
count 6561.000000
mean 2099.427526
std 1528.415666
min 0.000000
25% 956.000000
50% 1821.000000
75% 2810.000000
max 12113.000000
Name: SMOCP, dtype: float64
SMX: Second or junior mortgage or home equity loan status
b: N/A (GQ/vacant/not owned or being bought)
1: Yes, a second mortgage
2: Yes, a home equity loan
...
count 4971.000000
mean 2.709113
std 0.597341
min 1.000000
25% 3.000000
50% 3.000000
75% 3.000000
max 4.000000
Name: SMX, dtype: float64
SRNT: Specified rent unit
b: N/A
0: Not specified rent unit
1: Specified rent unit
...
count 14844.000000
mean 0.509229
std 0.499932
min 0.000000
25% 0.000000
50% 1.000000
75% 1.000000
max 1.000000
Name: SRNT, dtype: float64
SVAL: Specified value owner unit
b: N/A
0: Not specified value owner unit
1: Specified value owner unit
...
count 14844.000000
mean 0.328887
std 0.469825
min 0.000000
25% 0.000000
50% 0.000000
75% 1.000000
max 1.000000
Name: SVAL, dtype: float64
TAXP: Property taxes (yearly amount)
['Note: No adjustment factor is applied to TAXP.']
bb: N/A (GQ/vacant/not owned or being bought)
01: None
02: $ 1 - $ 49
...
count 6561.000000
mean 36.513032
std 19.481382
min 1.000000
25% 22.000000
50% 33.000000
75% 53.000000
max 68.000000
Name: TAXP, dtype: float64
WIF: Workers in family during the past 12 months
b: N/A (GQ/vacant/non-family household)
0: No workers
1: 1 worker
...
count 5953.000000
mean 1.432555
std 0.824398
min 0.000000
25% 1.000000
50% 2.000000
75% 2.000000
max 3.000000
Name: WIF, dtype: float64
WKEXREL: Work experience of householder and spouse
bb: N/A (GQ/vacant/not a family)
01: Householder and spouse worked FT
02: Householder worked FT; spouse worked < FT
...
count 5953.000000
mean 7.787166
std 5.420605
min 1.000000
25% 2.000000
50% 8.000000
75% 13.000000
max 15.000000
Name: WKEXREL, dtype: float64
WORKSTAT: Work status of householder or spouse in family households
bb: N/A (GQ/not a family household)
01: Husband and wife both in labor force, both employed or in Armed Forces
02: Husband and wife both in labor force, husband employed or in Armed Forces, wife unemployed
...
count 5929.000000
mean 7.442570
std 5.594645
min 1.000000
25% 1.000000
50% 9.000000
75% 13.000000
max 15.000000
Name: WORKSTAT, dtype: float64
num columns described = 84
print("`dfe`: Estimates for user verification filtered for 'District of Columbia'.")
dfe = pd.read_csv(path_ecsv)
tfmask_dc = dfe['state'] == 'District of Columbia'
dfe_dc = dfe.loc[tfmask_dc]
dfe_dc
`dfe`: Estimates for user verification filtered for 'District of Columbia'.
| st | state | characteristic | pums_est_09_to_13 | pums_se_09_to_13 | pums_moe_09_to_13 | |
|---|---|---|---|---|---|---|
| 288 | 11 | District of Columbia | Total population | 619,371 | 0 | 0 |
| 289 | 11 | District of Columbia | Housing unit population (RELP=0-15) | 579,281 | 0 | 0 |
| 290 | 11 | District of Columbia | GQ population (RELP=16-17) | 40,090 | 0 | 0 |
| 291 | 11 | District of Columbia | GQ institutional population (RELP=16) | 7,443 | 80 | 132 |
| 292 | 11 | District of Columbia | GQ noninstitutional population (RELP=17) | 32,647 | 80 | 132 |
| 293 | 11 | District of Columbia | Total males (SEX=1) | 292,566 | 361 | 595 |
| 294 | 11 | District of Columbia | Total females (SEX=2) | 326,805 | 361 | 595 |
| 295 | 11 | District of Columbia | Age 0-4 | 36,530 | 253 | 417 |
| 296 | 11 | District of Columbia | Age 5-9 | 27,658 | 636 | 1046 |
| 297 | 11 | District of Columbia | Age 10-14 | 24,621 | 598 | 984 |
| 298 | 11 | District of Columbia | Age 15-19 | 40,950 | 825 | 1357 |
| 299 | 11 | District of Columbia | Age 20-24 | 58,828 | 779 | 1281 |
| 300 | 11 | District of Columbia | Age 25-34 | 134,025 | 526 | 865 |
| 301 | 11 | District of Columbia | Age 35-44 | 84,310 | 534 | 878 |
| 302 | 11 | District of Columbia | Age 45-54 | 75,981 | 435 | 716 |
| 303 | 11 | District of Columbia | Age 55-59 | 35,191 | 599 | 985 |
| 304 | 11 | District of Columbia | Age 60-64 | 31,070 | 590 | 970 |
| 305 | 11 | District of Columbia | Age 65-74 | 38,245 | 295 | 485 |
| 306 | 11 | District of Columbia | Age 75-84 | 22,283 | 420 | 690 |
| 307 | 11 | District of Columbia | Age 85 and over | 9,679 | 377 | 619 |
| 308 | 11 | District of Columbia | Total housing units (TYPE=1) | 298,327 | 113 | 185 |
| 309 | 11 | District of Columbia | Total occupied units | 263,650 | 965 | 1588 |
| 310 | 11 | District of Columbia | Owner occupied units (TEN in 1,2) | 110,362 | 1363 | 2242 |
| 311 | 11 | District of Columbia | Renter occupied units (TEN in 3,4) | 153,288 | 1486 | 2444 |
| 312 | 11 | District of Columbia | Owned with a mortgage (TEN=1) | 85,483 | 1208 | 1988 |
| 313 | 11 | District of Columbia | Owned free and clear (TEN=2) | 24,879 | 565 | 929 |
| 314 | 11 | District of Columbia | Rented for cash (TEN=3) | 149,500 | 1511 | 2485 |
| 315 | 11 | District of Columbia | No cash rent (TEN=4) | 3,788 | 262 | 431 |
| 316 | 11 | District of Columbia | Total vacant units | 34,677 | 920 | 1514 |
| 317 | 11 | District of Columbia | For rent (VACS=1) | 10,686 | 618 | 1017 |
| 318 | 11 | District of Columbia | For sale only (VACS=3) | 2,953 | 325 | 534 |
| 319 | 11 | District of Columbia | All Other Vacant (VACS in 2,4,5,6,7) | 21,038 | 849 | 1397 |
print("`dfe`: Verify characteristic estimates, direct standard errors, and margin of error.")
# Verify the estimates following
# https://www.census.gov/programs-surveys/acs/
# technical-documentation/pums/documentation.2013.html
# tech_docs/pums/accuracy/2009_2013AccuracyPUMS.pdf
print()
tfmask_test_strs = collections.OrderedDict([
('PERSON RECORD', collections.OrderedDict([
('Total population', "np.asarray([True]*len(dfp))"),
('Housing unit population (RELP=0-15)',"np.logical_and(0 <= dfp['RELP'], dfp['RELP'] <= 15)"),
('GQ population (RELP=16-17)', "np.logical_and(16 <= dfp['RELP'], dfp['RELP'] <= 17)"),
('GQ institutional population (RELP=16)', "dfp['RELP'] == 16"),
('GQ noninstitutional population (RELP=17)', "dfp['RELP'] == 17"),
('Total males (SEX=1)', "dfp['SEX'] == 1"),
('Total females (SEX=2)', "dfp['SEX'] == 2"),
('Age 0-4', "np.logical_and(0 <= dfp['AGEP'], dfp['AGEP'] <= 4)"),
('Age 5-9', "np.logical_and(5 <= dfp['AGEP'], dfp['AGEP'] <= 9)"),
('Age 10-14', "np.logical_and(10 <= dfp['AGEP'], dfp['AGEP'] <= 14)"),
('Age 15-19', "np.logical_and(15 <= dfp['AGEP'], dfp['AGEP'] <= 19)"),
('Age 20-24', "np.logical_and(20 <= dfp['AGEP'], dfp['AGEP'] <= 24)"),
('Age 25-34', "np.logical_and(25 <= dfp['AGEP'], dfp['AGEP'] <= 34)"),
('Age 35-44', "np.logical_and(35 <= dfp['AGEP'], dfp['AGEP'] <= 44)"),
('Age 45-54', "np.logical_and(45 <= dfp['AGEP'], dfp['AGEP'] <= 54)"),
('Age 55-59', "np.logical_and(55 <= dfp['AGEP'], dfp['AGEP'] <= 59)"),
('Age 60-64', "np.logical_and(60 <= dfp['AGEP'], dfp['AGEP'] <= 64)"),
('Age 65-74', "np.logical_and(65 <= dfp['AGEP'], dfp['AGEP'] <= 74)"),
('Age 75-84', "np.logical_and(75 <= dfp['AGEP'], dfp['AGEP'] <= 84)"),
('Age 85 and over', "85 <= dfp['AGEP']")])),
('HOUSING RECORD', collections.OrderedDict([
('Total housing units (TYPE=1)', "dfh['TYPE'] == 1"),
('Total occupied units', "dfh['TEN'].notnull()"),
('Owner occupied units (TEN in 1,2)', "np.logical_or(dfh['TEN'] == 1, dfh['TEN'] == 2)"),
('Renter occupied units (TEN in 3,4)', "np.logical_or(dfh['TEN'] == 3, dfh['TEN'] == 4)"),
('Owned with a mortgage (TEN=1)', "dfh['TEN'] == 1"),
('Owned free and clear (TEN=2)', "dfh['TEN'] == 2"),
('Rented for cash (TEN=3)', "dfh['TEN'] == 3"),
('No cash rent (TEN=4)', "dfh['TEN'] == 4"),
('Total vacant units', "dfh['TEN'].isnull()"),
('For rent (VACS=1)', "dfh['VACS'] == 1"),
('For sale only (VACS=3)', "dfh['VACS'] == 3"),
('All Other Vacant (VACS in 2,4,5,6,7)',
"functools.reduce(np.logical_or, (dfh['VACS'] == vacs for vacs in [2,4,5,6,7]))")]))])
for record_type in records_dfs:
print("'{rt}'".format(rt=record_type))
df = records_dfs[record_type]['dataframe']
wt = records_dfs[record_type]['weight']
wts = records_dfs[record_type]['replicate_weights']
for char in tfmask_test_strs[record_type]:
print(" '{char}'".format(char=char))
# Select the reference verification data
# and the records for the characteristic.
tfmask_ref = dfe_dc['characteristic'] == char
tfmask_test = eval(tfmask_test_strs[record_type][char])
# Calculate and verify the estimate ('est') for the characteristic.
# The estimate is the sum of the sample weights 'WGTP'.
col = 'pums_est_09_to_13'
print(" '{col}':".format(col=col), end=' ')
ref_est = int(dfe_dc.loc[tfmask_ref, col].values[0].replace(',', ''))
test_est = df.loc[tfmask_test, wt].sum()
assert np.isclose(ref_est, test_est, rtol=0, atol=1)
print("(ref, test) = {tup}".format(tup=(ref_est, test_est)))
# Calculate and verify the "direct standard error" ('se') of the estimate.
# The direct standard error is a modified root-mean-square deviation
# using the "replicate weights" 'WGTP[1-80]'.
col = 'pums_se_09_to_13'
print(" '{col}' :".format(col=col), end=' ')
ref_se = dfe_dc.loc[tfmask_ref, col].values[0]
test_se = ((4/80)*((df.loc[tfmask_test, wts].sum() - test_est)**2).sum())**0.5
assert np.isclose(ref_se, test_se, rtol=0, atol=1)
print("(ref, test) = {tup}".format(tup=(ref_se, test_se)))
# Calculate and verify the margin of error ('moe') at the
# 90% confidence level (+/- 1.645 standard errors).
col = 'pums_moe_09_to_13'
print(" '{col}':".format(col=col), end=' ')
ref_moe = dfe_dc.loc[tfmask_ref, col].values[0]
test_moe = 1.645*test_se
assert np.isclose(ref_moe, test_moe, rtol=0, atol=1)
print("(ref, test) = {tup}".format(tup=(ref_moe, test_moe)))
`dfe`: Verify characteristic estimates, direct standard errors, and margin of error.
'PERSON RECORD'
'Total population'
'pums_est_09_to_13': (ref, test) = (619371, 619371)
'pums_se_09_to_13' : (ref, test) = (0, 0.0)
'pums_moe_09_to_13': (ref, test) = (0, 0.0)
'Housing unit population (RELP=0-15)'
'pums_est_09_to_13': (ref, test) = (579281, 579281)
'pums_se_09_to_13' : (ref, test) = (0, 0.0)
'pums_moe_09_to_13': (ref, test) = (0, 0.0)
'GQ population (RELP=16-17)'
'pums_est_09_to_13': (ref, test) = (40090, 40090)
'pums_se_09_to_13' : (ref, test) = (0, 0.0)
'pums_moe_09_to_13': (ref, test) = (0, 0.0)
'GQ institutional population (RELP=16)'
'pums_est_09_to_13': (ref, test) = (7443, 7443)
'pums_se_09_to_13' : (ref, test) = (80, 80.30971298666184)
'pums_moe_09_to_13': (ref, test) = (132, 132.10947786305871)
'GQ noninstitutional population (RELP=17)'
'pums_est_09_to_13': (ref, test) = (32647, 32647)
'pums_se_09_to_13' : (ref, test) = (80, 80.30971298666184)
'pums_moe_09_to_13': (ref, test) = (132, 132.10947786305871)
'Total males (SEX=1)'
'pums_est_09_to_13': (ref, test) = (292566, 292566)
'pums_se_09_to_13' : (ref, test) = (361, 361.4210148843036)
'pums_moe_09_to_13': (ref, test) = (595, 594.5375694846794)
'Total females (SEX=2)'
'pums_est_09_to_13': (ref, test) = (326805, 326805)
'pums_se_09_to_13' : (ref, test) = (361, 361.4210148843036)
'pums_moe_09_to_13': (ref, test) = (595, 594.5375694846794)
'Age 0-4'
'pums_est_09_to_13': (ref, test) = (36530, 36530)
'pums_se_09_to_13' : (ref, test) = (253, 253.37699185206222)
'pums_moe_09_to_13': (ref, test) = (417, 416.80515159664236)
'Age 5-9'
'pums_est_09_to_13': (ref, test) = (27658, 27658)
'pums_se_09_to_13' : (ref, test) = (636, 635.5916141674621)
'pums_moe_09_to_13': (ref, test) = (1046, 1045.5482053054752)
'Age 10-14'
'pums_est_09_to_13': (ref, test) = (24621, 24621)
'pums_se_09_to_13' : (ref, test) = (598, 598.0936799532328)
'pums_moe_09_to_13': (ref, test) = (984, 983.864103523068)
'Age 15-19'
'pums_est_09_to_13': (ref, test) = (40950, 40950)
'pums_se_09_to_13' : (ref, test) = (825, 825.0349386541154)
'pums_moe_09_to_13': (ref, test) = (1357, 1357.18247408602)
'Age 20-24'
'pums_est_09_to_13': (ref, test) = (58828, 58828)
'pums_se_09_to_13' : (ref, test) = (779, 778.715930233869)
'pums_moe_09_to_13': (ref, test) = (1281, 1280.9877052347144)
'Age 25-34'
'pums_est_09_to_13': (ref, test) = (134025, 134025)
'pums_se_09_to_13' : (ref, test) = (526, 525.9921102069878)
'pums_moe_09_to_13': (ref, test) = (865, 865.257021290495)
'Age 35-44'
'pums_est_09_to_13': (ref, test) = (84310, 84310)
'pums_se_09_to_13' : (ref, test) = (534, 533.5205244411877)
'pums_moe_09_to_13': (ref, test) = (878, 877.6412627057538)
'Age 45-54'
'pums_est_09_to_13': (ref, test) = (75981, 75981)
'pums_se_09_to_13' : (ref, test) = (435, 435.0808545546448)
'pums_moe_09_to_13': (ref, test) = (716, 715.7080057423907)
'Age 55-59'
'pums_est_09_to_13': (ref, test) = (35191, 35191)
'pums_se_09_to_13' : (ref, test) = (599, 598.5786915686191)
'pums_moe_09_to_13': (ref, test) = (985, 984.6619476303784)
'Age 60-64'
'pums_est_09_to_13': (ref, test) = (31070, 31070)
'pums_se_09_to_13' : (ref, test) = (590, 589.6810154651412)
'pums_moe_09_to_13': (ref, test) = (970, 970.0252704401572)
'Age 65-74'
'pums_est_09_to_13': (ref, test) = (38245, 38245)
'pums_se_09_to_13' : (ref, test) = (295, 295.0997289053313)
'pums_moe_09_to_13': (ref, test) = (485, 485.43905404927)
'Age 75-84'
'pums_est_09_to_13': (ref, test) = (22283, 22283)
'pums_se_09_to_13' : (ref, test) = (420, 419.69280432239964)
'pums_moe_09_to_13': (ref, test) = (690, 690.3946631103474)
'Age 85 and over'
'pums_est_09_to_13': (ref, test) = (9679, 9679)
'pums_se_09_to_13' : (ref, test) = (377, 376.5637396245156)
'pums_moe_09_to_13': (ref, test) = (619, 619.4473516823282)
'HOUSING RECORD'
'Total housing units (TYPE=1)'
'pums_est_09_to_13': (ref, test) = (298327, 298327)
'pums_se_09_to_13' : (ref, test) = (113, 112.68873058118989)
'pums_moe_09_to_13': (ref, test) = (185, 185.37296180605736)
'Total occupied units'
'pums_est_09_to_13': (ref, test) = (263650, 263650)
'pums_se_09_to_13' : (ref, test) = (965, 965.0778984102786)
'pums_moe_09_to_13': (ref, test) = (1588, 1587.5531428849083)
'Owner occupied units (TEN in 1,2)'
'pums_est_09_to_13': (ref, test) = (110362, 110362)
'pums_se_09_to_13' : (ref, test) = (1363, 1363.1910174293257)
'pums_moe_09_to_13': (ref, test) = (2242, 2242.449223671241)
'Renter occupied units (TEN in 3,4)'
'pums_est_09_to_13': (ref, test) = (153288, 153288)
'pums_se_09_to_13' : (ref, test) = (1486, 1485.6482760061347)
'pums_moe_09_to_13': (ref, test) = (2444, 2443.8914140300917)
'Owned with a mortgage (TEN=1)'
'pums_est_09_to_13': (ref, test) = (85483, 85483)
'pums_se_09_to_13' : (ref, test) = (1208, 1208.399126944405)
'pums_moe_09_to_13': (ref, test) = (1988, 1987.8165638235462)
'Owned free and clear (TEN=2)'
'pums_est_09_to_13': (ref, test) = (24879, 24879)
'pums_se_09_to_13' : (ref, test) = (565, 565.0110618386157)
'pums_moe_09_to_13': (ref, test) = (929, 929.4431967245227)
'Rented for cash (TEN=3)'
'pums_est_09_to_13': (ref, test) = (149500, 149500)
'pums_se_09_to_13' : (ref, test) = (1511, 1510.8262970970554)
'pums_moe_09_to_13': (ref, test) = (2485, 2485.309258724656)
'No cash rent (TEN=4)'
'pums_est_09_to_13': (ref, test) = (3788, 3788)
'pums_se_09_to_13' : (ref, test) = (262, 262.1715087495207)
'pums_moe_09_to_13': (ref, test) = (431, 431.2721318929615)
'Total vacant units'
'pums_est_09_to_13': (ref, test) = (34677, 34677)
'pums_se_09_to_13' : (ref, test) = (920, 920.3688391074527)
'pums_moe_09_to_13': (ref, test) = (1514, 1514.0067403317596)
'For rent (VACS=1)'
'pums_est_09_to_13': (ref, test) = (10686, 10686)
'pums_se_09_to_13' : (ref, test) = (618, 618.3948172486571)
'pums_moe_09_to_13': (ref, test) = (1017, 1017.2594743740409)
'For sale only (VACS=3)'
'pums_est_09_to_13': (ref, test) = (2953, 2953)
'pums_se_09_to_13' : (ref, test) = (325, 324.77245880770124)
'pums_moe_09_to_13': (ref, test) = (534, 534.2506947386686)
'All Other Vacant (VACS in 2,4,5,6,7)'
'pums_est_09_to_13': (ref, test) = (21038, 21038)
'pums_se_09_to_13' : (ref, test) = (849, 849.4756618055635)
'pums_moe_09_to_13': (ref, test) = (1397, 1397.3874636701519)
# Export ipynb to html
for template in ['basic', 'full']:
path_html = os.path.splitext(path_ipynb)[0]+'-'+template+'.html'
cmd = ['jupyter', 'nbconvert', '--to', 'html', '--template', template, path_ipynb, '--output', path_html]
print(' '.join(cmd))
subprocess.run(args=cmd, check=True)
print()
jupyter nbconvert --to html --template basic /home/samuel_harrold/stharrold.github.io/content/static/20160110-etl-census-with-python/20160110-etl-census-with-python.ipynb --output /home/samuel_harrold/stharrold.github.io/content/static/20160110-etl-census-with-python/20160110-etl-census-with-python-basic.html jupyter nbconvert --to html --template full /home/samuel_harrold/stharrold.github.io/content/static/20160110-etl-census-with-python/20160110-etl-census-with-python.ipynb --output /home/samuel_harrold/stharrold.github.io/content/static/20160110-etl-census-with-python/20160110-etl-census-with-python-full.html