cd ~
# Import standard packages.
import collections
import functools
import os
import pdb # Debug with pdb.
import subprocess
import sys
import time
# Import installed packages.
import numpy as np
import pandas as pd
# Import local packages.
# Insert current directory into module search path.
# Autoreload local packages after editing.
# `dsdemos` version:
sys.path.insert(0, os.path.join(os.path.curdir, r'dsdemos'))
%reload_ext autoreload
%autoreload 2
import dsdemos as dsd
print(time.strftime(r'%Y-%m-%dT%H:%M:%S%Z', time.gmtime()))
print("Python:", sys.version_info)
print("numpy:", np.__version__)
print("pandas:", pd.__version__)

2016-01-10T19:00:00
Python: sys.version_info(major=3, minor=5, micro=1, releaselevel='final', serial=0)
numpy: 1.10.2
pandas: 0.17.1


File sources:

# File paths
path_static = os.path.join(os.path.expanduser(r'~'), r'')
basename = r'20160110-etl-census-with-python'
filename = basename
path_ipynb = os.path.join(path_static, basename, filename+'.ipynb')
path_disk = os.path.abspath(r'/mnt/disk-20151227t211000z/')
path_acs = os.path.join(path_disk, r'www2-census-gov/programs-surveys/acs/')
path_pcsv = os.path.join(path_acs, r'data/pums/2013/5-Year/ss13pdc.csv') # 'pdc' = 'person DC'
path_hcsv = os.path.join(path_acs, r'data/pums/2013/5-Year/ss13hdc.csv') # 'hdc' = 'housing DC'
path_ecsv = os.path.join(path_acs, r'tech_docs/pums/estimates/pums_estimates_9_13.csv')
path_dtxt = os.path.join(path_acs, r'tech_docs/pums/data_dict/PUMS_Data_Dictionary_2009-2013.txt')

# Weights
pwt = 'PWGTP' # person weight
pwts = [pwt+str(inum) for inum in range(1, 81)]
hwt = 'WGTP' # housing weight
hwts = [hwt+str(inum) for inum in range(1, 81)]

Extract, transform, and load

Data dictionary

print("`ddict`: Load the data dictionary and display the hierarchical structure.")
# Only `ddict` is used below.
# The hierarchical data frame is only for display. 
ddict = dsd.census.parse_pumsdatadict(path=path_dtxt)
tmp = dict()
for record_type in ddict['record_types']:
    tmp[record_type] = pd.DataFrame.from_dict(ddict['record_types'][record_type], orient='index')
pd.concat(tmp, names=['record_type', 'var_name']).head()
`ddict`: Load the data dictionary and display the hierarchical structure.
length description var_codes notes
record_type var_name
HOUSING RECORD ACR 1 Lot size {'b': 'N/A (GQ/not a one-family house or mobil... NaN
ADJHSG 7 Adjustment factor for housing dollar amounts (... {'1086032': '2009 factor', '1068395': '2010 fa... [Note: The values of ADJHSG inflation-adjusts ...
ADJINC 7 Adjustment factor for income and earnings doll... {'1085467': '2009 factor (0.999480 * 1.0860317... [Note: The values of ADJINC inflation-adjusts ...
AGS 1 Sales of Agriculture Products (Yearly sales) {'b': 'N/A (GQ/vacant/not a one-family house o... [Note: No adjustment factor is applied to AGS.]
BATH 1 Bathtub or shower {'b': 'N/A (GQ)', '1': 'Yes', '2': 'No'} NaN
print("`ddict`: First 10 unstructured notes from end of file.")
`ddict`: First 10 unstructured notes from end of file.
['*  In cases where the SOC occupation code ends in X(s) or Y(s), two or more SOC',
 'occupation codes were aggregated to correspond to a specific Census occupation',
 'code. In these cases, the Census occupation description is used for the SOC',
 'occupation title."',
 '** These codes are pseudo codes developed by the Census Bureau and are not',
 '   official or equivalent NAICS or SOC codes.',
 'Legend to Identify NAICS Equivalents',
 '     M = Multiple NAICS codes',
 '     P = Part of a NAICS code - NAICS code split between two or more Census',
 '         codes']

PUMS data

print("`dfp`, `dfh`: Load person and housing records.")
time_start = time.perf_counter()
for path in [path_pcsv, path_hcsv]:
    with open(path) as fobj:
        nlines = sum(1 for _ in fobj)
    with open(path) as fobj:
        first_line = fobj.readline()
        ncols = first_line.count(',')+1
    print("    size (MB)   = {size:.1f}".format(size=os.path.getsize(path)/1e6))
    print("    num lines   = {nlines}".format(nlines=nlines))
    print("    num columns = {ncols}".format(ncols=ncols))

# For ss13pdc.csv, low_memory=False since otherwise pandas raises DtypeWarning.
dfp = pd.read_csv(path_pcsv, low_memory=False)
dfh = pd.read_csv(path_hcsv, low_memory=True)
for (name, df) in [('dfp', dfp), ('dfh', dfh)]:
    print("{name} RAM usage (MB) = {mem:.1f}".format(
            name=name, mem=df.memory_usage().sum()/1e6))
time_stop = time.perf_counter()
print("Time elapsed (sec) = {diff:.1f}".format(diff=time_stop-time_start))
`dfp`, `dfh`: Load person and housing records.
    size (MB)   = 30.5
    num lines   = 30560
    num columns = 295
    size (MB)   = 13.5
    num lines   = 17501
    num columns = 205

dfp RAM usage (MB) = 72.1
dfh RAM usage (MB) = 28.7

Time elapsed (sec) = 2.0
print("`dfp`: First 5 person records.")
`dfp`: First 5 person records.
0 P 2009000000403 1 102 -9 11 1085467 20 38 1 ... 6 26 31 32 26 6 36 6 19 20
1 P 2009000001113 1 103 -9 11 1085467 13 78 1 ... 13 30 12 13 4 4 18 24 4 21
2 P 2009000001113 2 103 -9 11 1085467 25 39 1 ... 26 50 23 20 8 7 38 41 7 37
3 P 2009000001113 3 103 -9 11 1085467 17 8 1 ... 15 32 17 15 6 4 26 32 5 30
4 P 2009000001978 1 103 -9 11 1085467 37 53 1 ... 65 12 13 37 36 41 57 36 11 33

5 rows × 295 columns

print("`dfp`: First 5 housing records.")
`dfp`: First 5 housing records.
0 600 H 2009000000403 5 102 -9 3 11 1086032 1085467 ... 6 25 30 32 26 6 36 6 18 19
1 NaN H 2009000001113 5 103 -9 3 11 1086032 1085467 ... 14 29 12 12 4 4 18 23 4 22
2 480 H 2009000001978 5 103 -9 3 11 1086032 1085467 ... 65 12 14 37 36 41 57 36 11 34
3 NaN H 2009000002250 5 105 -9 3 11 1086032 1085467 ... 4 4 4 4 23 14 11 4 20 21
4 2500 H 2009000002985 5 101 -9 3 11 1086032 1085467 ... 66 45 10 35 34 10 34 55 50 10

5 rows × 205 columns

r"""`dfp`, `dfh`, `ddict`: Describe all columns ('variables') that aren't weights or flags.
Printed format:
COL: Column name.
    Column description.
    Multi-line optional column notes.
    1-3 line description of value meanings ('variable codes').
    Multi-line statistical description and data type.
num columns described = ncols""")
records_dfs = collections.OrderedDict([
    ('PERSON RECORD',  {'dataframe': dfp, 'weight': pwt, 'replicate_weights': pwts}),
    ('HOUSING RECORD', {'dataframe': dfh, 'weight': hwt, 'replicate_weights': hwts})])
for record_type in records_dfs:
    df = records_dfs[record_type]['dataframe']
    ncols_desc = 0 # number of columns described
    for col in df.columns:
        if col in ddict['record_types'][record_type]:
            col_dict = ddict['record_types'][record_type][col]
            desc = col_dict['description']
            col_dict = None
            desc = 'Column not in data dictionary.'
        if not (
            (col.startswith('F') and (desc.endswith(' flag') or desc.endswith(' edit')))
            or ('WGTP' in col and "Weight replicate" in desc)):
                print("{col}: {desc}".format(col=col, desc=desc))
                ncols_desc += 1
                if col_dict is not None:
                    if 'notes' in col_dict:
                        print("    {notes}".format(notes=col_dict['notes']))
                    for (inum, var_code) in enumerate(col_dict['var_codes']):
                        var_code_desc = col_dict['var_codes'][var_code]
                        print("    {vc}: {vcd}".format(vc=var_code, vcd=var_code_desc))
                        if inum >= 2:
                            print("    ...")
                print('    '+repr(df[col].describe()).replace('\n', '\n    '))
    print("num columns described = {ncd}".format(ncd=ncols_desc))
`dfp`, `dfh`, `ddict`: Describe all columns ('variables') that aren't weights or flags.
Printed format:
COL: Column name.
    Column description.
    Multi-line optional column notes.
    1-3 line description of value meanings ('variable codes').
    Multi-line statistical description and data type.
num columns described = ncols

RT: Record Type
    P: Person Record
    count     30559
    unique        1
    top           P
    freq      30559
    Name: RT, dtype: object
SERIALNO: Housing unit/GQ person serial number
    200900000001..201399999999: Unique identifier
    count    3.055900e+04
    mean     2.011081e+12
    std      1.407751e+09
    min      2.009000e+12
    25%      2.010000e+12
    50%      2.011001e+12
    75%      2.012001e+12
    max      2.013001e+12
    Name: SERIALNO, dtype: float64
SPORDER: Person number
    01..20: Person number
    count    30559.000000
    mean         1.850584
    std          1.235291
    min          1.000000
    25%          1.000000
    50%          1.000000
    75%          2.000000
    max         13.000000
    Name: SPORDER, dtype: float64
PUMA00: Public use microdata area code (PUMA) based on Census 2000 definition for data collected prior to 2012. Use in combination with PUMA10.
    ['Note: Public Use Microdata Areas (PUMAs) designate areas of 100,000 or more population.  Use with ST for unique code. PUMA00 applies to data collected in calendar years 2011 and earlier. Users of multi-year datasets that contain data from before and after 2012 should use PUMA00 and PUMA10 together where possible, but not all PUMAs will be comparable. See to identify PUMA changes between the two vintages.']
    00100..08200: Public use microdata area codes
    7777: combination of 01801, 01802, and 01905 in Louisiana
    -0009: Code classification is Not Applicable for data collected in 2012 or later
    count    30559.000000
    mean        55.840243
    std         55.336541
    min         -9.000000
    25%         -9.000000
    50%        101.000000
    75%        103.000000
    max        105.000000
    Name: PUMA00, dtype: float64
PUMA10: Public use microdata area code (PUMA) based on 2010 Census definition for data Collected in 2012 or later. Use in combination with PUMA00.
    ['Note: Public Use Microdata Areas (PUMAs) designate areas of 100,000 or more population.  Use with ST for unique code. PUMA10 applies to data collected in calendar year 2012 and later. Users of multi-year datasets that contain data from before and after 2012 should use PUMA00 and PUMA10 together where possible, but not all PUMAs will be comparable. See to identify PUMA changes between the two vintages.']
    00100..70301: Public use microdata area codes
    -0009: Code classification is Not Applicable for data collected prior to 2012
    count    30559.000000
    mean        38.259923
    std         55.395391
    min         -9.000000
    25%         -9.000000
    50%         -9.000000
    75%        103.000000
    max        105.000000
    Name: PUMA10, dtype: float64
ST: State Code
    01: Alabama/AL
    02: Alaska/AK
    04: Arizona/AZ
    count    30559
    mean        11
    std          0
    min         11
    25%         11
    50%         11
    75%         11
    max         11
    Name: ST, dtype: float64
ADJINC: Adjustment factor for income and earnings dollar amounts (6 implied decimal places)
    ['Note: The values of ADJINC inflation-adjusts reported income to 2013 dollars. ADJINC incorporates an adjustment that annualizes the different rolling reference periods for reported income (as done in the single-year data using the variable ADJINC from the 1-year file) and an adjustment to inflation-adjust the annualized income to 2013 dollars. ADJINC applies to variables FINCP and HINCP in the housing record, and variables INTP, OIP, PAP, PERNP, PINCP, RETP, SEMP, SSIP, SSP, and WAGP in the person record.']
    1085467: 2009 factor (0.999480 * 1.08603175)
    1076540: 2010 factor (1.007624 * 1.06839475)
    1054614: 2011 factor (1.018237 * 1.03572510)
    count      30559.000000
    mean     1048186.138192
    std        29716.696630
    min      1007549.000000
    25%      1024887.000000
    50%      1054614.000000
    75%      1076540.000000
    max      1085467.000000
    Name: ADJINC, dtype: float64
PWGTP: Person's weight
    00001..09999: Integer weight of person
    count    30559.000000
    mean        20.268039
    std         13.310075
    min          1.000000
    25%         12.000000
    50%         16.000000
    75%         24.000000
    max        173.000000
    Name: PWGTP, dtype: float64
    00: Under 1 year
    01..99: 1 to 99 years (Top-coded***)
    count    30559.000000
    mean        38.728198
    std         21.780122
    min          0.000000
    25%         23.000000
    50%         35.000000
    75%         55.000000
    max         95.000000
    Name: AGEP, dtype: float64
CIT: Citizenship status
    1: Born in the U.S.
    2: Born in Puerto Rico, Guam, the U.S. Virgin Islands, or the Northern Marianas
    3: Born abroad of American parent(s)
    count    30559.000000
    mean         1.471252
    std          1.201267
    min          1.000000
    25%          1.000000
    50%          1.000000
    75%          1.000000
    max          5.000000
    Name: CIT, dtype: float64
CITWP05: Year of naturalization write-in for data collected prior to 2012
    bbbb: Not eligible - not naturalized
    1925: 1925 or earlier (Bottom-coded)
    1926: 1926 - 1930
    count    1595.000000
    mean     1110.813166
    std       994.495059
    min        -9.000000
    25%        -9.000000
    50%      1973.000000
    75%      1999.000000
    max      2011.000000
    Name: CITWP05, dtype: float64
CITWP12: Year of naturalization write-in for data collected in 2012 or later
    bbbb: Not eligible - not naturalized
    1928: 1928 or earlier (Bottom-coded)
    1929: 1929 - 1933
    count    1595.000000
    mean      875.462696
    std       996.639591
    min        -9.000000
    25%        -9.000000
    50%        -9.000000
    75%      1999.000000
    max      2013.000000
    Name: CITWP12, dtype: float64
COW: Class of worker
    b: Not in universe (less than 16 years old/NILF who last worked more than 5 years ago or never worked)
    1: Employee of a private for-profit company or business, or of an individual, for wages, salary, or commissions
    2: Employee of a private not-for-profit, tax-exempt, or charitable organization
    count    20557.000000
    mean         2.592937
    std          1.971727
    min          1.000000
    25%          1.000000
    50%          2.000000
    75%          5.000000
    max          9.000000
    Name: COW, dtype: float64
DDRS: Self-care difficulty
    b: N/A (Less than 5 years old)
    1: Yes
    2: No
    count    29078.000000
    mean         1.969668
    std          0.171503
    min          1.000000
    25%          2.000000
    50%          2.000000
    75%          2.000000
    max          2.000000
    Name: DDRS, dtype: float64
DEAR: Hearing difficulty
    1: Yes
    2: No
    count    30559.000000
    mean         1.978010
    std          0.146654
    min          1.000000
    25%          2.000000
    50%          2.000000
    75%          2.000000
    max          2.000000
    Name: DEAR, dtype: float64
DEYE: Vision difficulty
    1: Yes
    2: No
    count    30559.000000
    mean         1.975163
    std          0.155631
    min          1.000000
    25%          2.000000
    50%          2.000000
    75%          2.000000
    max          2.000000
    Name: DEYE, dtype: float64
DOUT: Independent living difficulty
    b: N/A (Less than 15 years old)
    1: Yes
    2: No
    count    26658.000000
    mean         1.943432
    std          0.231020
    min          1.000000
    25%          2.000000
    50%          2.000000
    75%          2.000000
    max          2.000000
    Name: DOUT, dtype: float64
DPHY: Ambulatory difficulty
    b: N/A (Less than 5 years old)
    1: Yes
    2: No
    count    29078.000000
    mean         1.922966
    std          0.266650
    min          1.000000
    25%          2.000000
    50%          2.000000
    75%          2.000000
    max          2.000000
    Name: DPHY, dtype: float64
DRAT: Veteran service connected disability rating (percentage)
    b: N/A (No service-connected disability/never served in military)
    1: 0 percent
    2: 10 or 20 percent
    count    287.000000
    mean       3.400697
    std        1.587671
    min        1.000000
    25%        2.000000
    50%        3.000000
    75%        5.000000
    max        6.000000
    Name: DRAT, dtype: float64
DRATX: Veteran service connected disability rating (checkbox)
    b: N/A (Less than 17 years old/never served in military)
    1: Yes
    2: No
    count    2072.000000
    mean        1.861486
    std         0.345522
    min         1.000000
    25%         2.000000
    50%         2.000000
    75%         2.000000
    max         2.000000
    Name: DRATX, dtype: float64
DREM: Cognitive difficulty
    b: N/A (Less than 5 years old)
    1: Yes
    2: No
    count    29078.000000
    mean         1.943015
    std          0.231817
    min          1.000000
    25%          2.000000
    50%          2.000000
    75%          2.000000
    max          2.000000
    Name: DREM, dtype: float64
ENG: Ability to speak English
    b: N/A (less than 5 years old/speaks only English)
    1: Very well
    2: Well
    count    4231.000000
    mean        1.429449
    std         0.750207
    min         1.000000
    25%         1.000000
    50%         1.000000
    75%         2.000000
    max         4.000000
    Name: ENG, dtype: float64
FER: Gave birth to child within the past 12 months
    ['NOTE: Problems in the collection of data on women who gave birth in the past year (FER) in 2012 led to suppressing this variable in 59 PUMAs within states Florida, Georgia, Kansas, Montana, North Carolina, Ohio and Texas. This only affects 2012 vintage data. See the Estimation section of the Accuracy of the Data for the 2009-2013 5-year PUMS for more information on PUMS estimates using FER.']
    b: N/A (less than 15 years/greater than 50 years/ male)
    1: Yes
    2: No
    count    9036.000000
    mean        1.955069
    std         0.207165
    min         1.000000
    25%         2.000000
    50%         2.000000
    75%         2.000000
    max         2.000000
    Name: FER, dtype: float64
GCL: Grandparents living with grandchildren
    b: N/A (less than 30 years/institutional GQ)
    1: Yes
    2: No
    count    18439.000000
    mean         1.968599
    std          0.174403
    min          1.000000
    25%          2.000000
    50%          2.000000
    75%          2.000000
    max          2.000000
    Name: GCL, dtype: float64
GCM: Length of time responsible for grandchildren
    b: N/A (less than 30 years/grandparent not responsible for grandchild/institutional GQ)
    1: Less than 6 months
    2: 6 to 11 months
    count    228.000000
    mean       4.026316
    std        1.078100
    min        1.000000
    25%        3.000000
    50%        4.000000
    75%        5.000000
    max        5.000000
    Name: GCM, dtype: float64
GCR: Grandparents responsible for grandchildren
    b: N/A (less than 30 years/institutional GQ/grandparent not living with grandchild)
    1: Yes
    2: No
    count    579.000000
    mean       1.606218
    std        0.489010
    min        1.000000
    25%        1.000000
    50%        2.000000
    75%        2.000000
    max        2.000000
    Name: GCR, dtype: float64
HINS1: Insurance through a current or former employer or union
    1: Yes
    2: No
    count    30559.000000
    mean         1.387120
    std          0.487099
    min          1.000000
    25%          1.000000
    50%          1.000000
    75%          2.000000
    max          2.000000
    Name: HINS1, dtype: float64
HINS2: Insurance purchased directly from an insurance company
    1: Yes
    2: No
    count    30559.000000
    mean         1.852548
    std          0.354562
    min          1.000000
    25%          2.000000
    50%          2.000000
    75%          2.000000
    max          2.000000
    Name: HINS2, dtype: float64
HINS3: Medicare, for people 65 and older, or people with certain disabilities
    1: Yes
    2: No
    count    30559.000000
    mean         1.847999
    std          0.359028
    min          1.000000
    25%          2.000000
    50%          2.000000
    75%          2.000000
    max          2.000000
    Name: HINS3, dtype: float64
HINS4: Medicaid, Medical Assistance, or any kind of government-assistance plan for those with low incomes or a disability
    1: Yes
    2: No
    count    30559.000000
    mean         1.763016
    std          0.425239
    min          1.000000
    25%          2.000000
    50%          2.000000
    75%          2.000000
    max          2.000000
    Name: HINS4, dtype: float64
HINS5: TRICARE or other military health care
    1: Yes
    2: No
    count    30559.000000
    mean         1.973461
    std          0.160734
    min          1.000000
    25%          2.000000
    50%          2.000000
    75%          2.000000
    max          2.000000
    Name: HINS5, dtype: float64
HINS6: VA (including those who have ever used or enrolled for VA health care)
    1: Yes
    2: No
    count    30559.000000
    mean         1.981675
    std          0.134127
    min          1.000000
    25%          2.000000
    50%          2.000000
    75%          2.000000
    max          2.000000
    Name: HINS6, dtype: float64
HINS7: Indian Health Service
    1: Yes
    2: No
    count    30559.000000
    mean         1.999215
    std          0.028014
    min          1.000000
    25%          2.000000
    50%          2.000000
    75%          2.000000
    max          2.000000
    Name: HINS7, dtype: float64
INTP: Interest, dividends, and net rental income past 12 months (signed)
    ['Note: Use values from ADJINC to adjust INTP to constant dollars.']
    bbbbbb: N/A (less than 15 years old)
    000000: None
    -09999..-00001: Loss $1 to $9999 (Rounded and bottom-coded)
    count     26658.000000
    mean       2798.324368
    std       18916.559752
    min       -7700.000000
    25%           0.000000
    50%           0.000000
    75%           0.000000
    max      325000.000000
    Name: INTP, dtype: float64
JWMNP: Travel time to work
    bbb: N/A (not a worker or worker who worked at home)
    001..200: 1 to 200 minutes to get to work (Top-coded)
    count    14545.000000
    mean        29.764043
    std         19.584350
    min          1.000000
    25%         15.000000
    50%         30.000000
    75%         40.000000
    max        142.000000
    Name: JWMNP, dtype: float64
JWRIP: Vehicle occupancy
    bb: N/A (not a worker or worker whose means of transportation to work was not car, truck, or van)
    01: Drove alone
    02: In 2-person carpool
    count    6211.000000
    mean        1.224602
    std         0.677173
    min         1.000000
    25%         1.000000
    50%         1.000000
    75%         1.000000
    max        10.000000
    Name: JWRIP, dtype: float64
JWTR: Means of transportation to work
    bb: N/A (not a worker--not in the labor force, including persons under 16 years; unemployed; employed, with a job but not at work; Armed Forces, with a job but not at work)
    01: Car, truck, or van
    02: Bus or trolley bus
    count    15327.000000
    mean         3.863900
    std          3.554906
    min          1.000000
    25%          1.000000
    50%          2.000000
    75%          4.000000
    max         12.000000
    Name: JWTR, dtype: float64
LANX: Language other than English spoken at home
    b: N/A (less than 5 years old)
    1: Yes, speaks another language
    2: No, speaks only English
    count    29078.000000
    mean         1.854495
    std          0.352616
    min          1.000000
    25%          2.000000
    50%          2.000000
    75%          2.000000
    max          2.000000
    Name: LANX, dtype: float64
MAR: Marital status
    1: Married
    2: Widowed
    3: Divorced
    count    30559.000000
    mean         3.659118
    std          1.737333
    min          1.000000
    25%          1.000000
    50%          5.000000
    75%          5.000000
    max          5.000000
    Name: MAR, dtype: float64
MARHD: Divorced in the past 12 months
    b: N/A (age less than 15 years; never married)
    1: Yes
    2: No
    count    12371.000000
    mean         1.982297
    std          0.131874
    min          1.000000
    25%          2.000000
    50%          2.000000
    75%          2.000000
    max          2.000000
    Name: MARHD, dtype: float64
MARHM: Married in the past 12 months
    b: N/A (age less than 15 years; never married)
    1: Yes
    2: No
    count    12371.000000
    mean         1.954086
    std          0.209307
    min          1.000000
    25%          2.000000
    50%          2.000000
    75%          2.000000
    max          2.000000
    Name: MARHM, dtype: float64
MARHT: Number of times married
    b: N/A (age less than 15 years; never married)
    1: One time
    2: Two times
    count    12371.000000
    mean         1.207259
    std          0.461325
    min          1.000000
    25%          1.000000
    50%          1.000000
    75%          1.000000
    max          3.000000
    Name: MARHT, dtype: float64
MARHW: Widowed in the past 12 months
    b: N/A (age less than 15 years; never married)
    1: Yes
    2: No
    count    12371.000000
    mean         1.990866
    std          0.095140
    min          1.000000
    25%          2.000000
    50%          2.000000
    75%          2.000000
    max          2.000000
    Name: MARHW, dtype: float64
MARHYP05: Year last married for data collected prior to 2012
    bbbb: N/A (age less than 15 years; never married)
    1928: 1928 or earlier (Bottom-coded)
    1929: 1929
    count    12371.000000
    mean      1158.447337
    std        982.950909
    min         -9.000000
    25%         -9.000000
    50%       1963.000000
    75%       1994.000000
    max       2011.000000
    Name: MARHYP05, dtype: float64
MARHYP12: Year last married for data collected in 2012 or later
    bbbb: N/A (age less than 15 years; never married)
    1932: 1932 or earlier (Bottom-coded)
    1933: 1933
    count    12371.000000
    mean       819.956026
    std        984.800070
    min         -9.000000
    25%         -9.000000
    50%         -9.000000
    75%       1987.000000
    max       2013.000000
    Name: MARHYP12, dtype: float64
MIG: Mobility status (lived here 1 year ago)
    b: N/A (less than 1 year old)
    1: Yes, same house (nonmovers)
    2: No, outside US and Puerto Rico
    count    30229.00000
    mean         1.37163
    std          0.76873
    min          1.00000
    25%          1.00000
    50%          1.00000
    75%          1.00000
    max          3.00000
    Name: MIG, dtype: float64
MIL: Military service
    b: N/A (less than 17 years old)
    1: Now on active duty
    2: On active duty in the past, but not now
    count    26114.000000
    mean         3.842843
    std          0.549228
    min          1.000000
    25%          4.000000
    50%          4.000000
    75%          4.000000
    max          4.000000
    Name: MIL, dtype: float64
MLPA: Served September 2001 or later
    b: N/A (Less than 17 years old/no active duty)
    0: Did not serve this period
    1: Served this period
    count    1855.000000
    mean        0.224798
    std         0.417562
    min         0.000000
    25%         0.000000
    50%         0.000000
    75%         0.000000
    max         1.000000
    Name: MLPA, dtype: float64
MLPB: Served August 1990 - August 2001 (including Persian Gulf War)
    b: N/A (Less than 17 years old/no active duty)
    0: Did not serve this period
    1: Served this period
    count    1855.000000
    mean        0.161186
    std         0.367802
    min         0.000000
    25%         0.000000
    50%         0.000000
    75%         0.000000
    max         1.000000
    Name: MLPB, dtype: float64
MLPCD: Served May 1975 - July 1990
    b: N/A (less than 17 years old/no active duty)
    0: Did not serve this period
    1: Served this period
    count    1855.000000
    mean        0.232345
    std         0.422442
    min         0.000000
    25%         0.000000
    50%         0.000000
    75%         0.000000
    max         1.000000
    Name: MLPCD, dtype: float64
MLPE: Served Vietnam era (August 1964 - April 1975)
    b: N/A (Less than 17 years old/no active duty)
    0: Did not serve this period
    1: Served this period
    count    1855.000000
    mean        0.290027
    std         0.453897
    min         0.000000
    25%         0.000000
    50%         0.000000
    75%         1.000000
    max         1.000000
    Name: MLPE, dtype: float64
MLPFG: Served February 1955 - July 1964
    b: N/A (less than 17 years old/no active duty)
    0: Did not serve this period
    1: Served this period
    count    1855.000000
    mean        0.145013
    std         0.352209
    min         0.000000
    25%         0.000000
    50%         0.000000
    75%         0.000000
    max         1.000000
    Name: MLPFG, dtype: float64
MLPH: Served Korean War (July 1950 - January 1955)
    b: N/A (Less than 17 years old/no active duty)
    0: Did not serve this period
    1: Served this period
    count    1855.000000
    mean        0.117520
    std         0.322126
    min         0.000000
    25%         0.000000
    50%         0.000000
    75%         0.000000
    max         1.000000
    Name: MLPH, dtype: float64
MLPI: Served January 1947 - June 1950
    b: N/A (Less than 17 years old/no active duty)
    0: Did not serve this period
    1: Served this period
    count    1855.000000
    mean        0.017790
    std         0.132222
    min         0.000000
    25%         0.000000
    50%         0.000000
    75%         0.000000
    max         1.000000
    Name: MLPI, dtype: float64
MLPJ: Served World War II (December 1941 - December 1946)
    b: N/A (Less than 17 years old/no active duty)
    0: Did not serve this period
    1: Served this period
    count    1855.000000
    mean        0.086253
    std         0.280814
    min         0.000000
    25%         0.000000
    50%         0.000000
    75%         0.000000
    max         1.000000
    Name: MLPJ, dtype: float64
MLPK: Served November 1941 or earlier
    b: N/A (Less than 17 years old/no active duty)
    0: Did not serve this period
    1: Served this period
    count    1855.000000
    mean        0.003774
    std         0.061330
    min         0.000000
    25%         0.000000
    50%         0.000000
    75%         0.000000
    max         1.000000
    Name: MLPK, dtype: float64
NWAB: Temporary absence from work (UNEDITED-See "Employment Status Recode" (ESR))
    b: N/A (less than 16 years old/at work/on layoff)
    1: Yes
    2: No
    count    26399.000000
    mean         2.564074
    std          0.529434
    min          1.000000
    25%          2.000000
    50%          3.000000
    75%          3.000000
    max          3.000000
    Name: NWAB, dtype: float64
NWAV: Available for work (UNEDITED-See "Employment Status Recode" (ESR))
    b: N/A (less than 16 years/at work/not looking)
    1: Yes
    2: No, temporarily ill
    count    26399.000000
    mean         4.530702
    std          1.224743
    min          1.000000
    25%          5.000000
    50%          5.000000
    75%          5.000000
    max          5.000000
    Name: NWAV, dtype: float64
NWLA: On layoff from work (UNEDITED-See "Employment Status Recode" (ESR))
    b: N/A (less than 16 years old/at work)
    1: Yes
    2: No
    count    26399.000000
    mean         2.540134
    std          0.524830
    min          1.000000
    25%          2.000000
    50%          3.000000
    75%          3.000000
    max          3.000000
    Name: NWLA, dtype: float64
NWLK: Looking for work (UNEDITED-See "Employment Status Recode" (ESR))
    b: N/A (less than 16 years old/at work/temporarily absent/informed of recall)
    1: Yes
    2: No
    count    26399.000000
    mean         2.503315
    std          0.635689
    min          1.000000
    25%          2.000000
    50%          3.000000
    75%          3.000000
    max          3.000000
    Name: NWLK, dtype: float64
NWRE: Informed of recall (UNEDITED-See "Employment Status Recode" (ESR))
    b: N/A (less than 16 years old/at work/not on layoff)
    1: Yes
    2: No
    count    26399.000000
    mean         2.903974
    std          0.315014
    min          1.000000
    25%          3.000000
    50%          3.000000
    75%          3.000000
    max          3.000000
    Name: NWRE, dtype: float64
OIP: All other income past 12 months
    ['Note: Use values from ADJINC to adjust OIP to constant dollars.']
    bbbbbb: N/A (less than 15 years old)
    000000: None
    000001..999999: $1 to $999999 (Rounded and top-coded)
    count    26658.000000
    mean       675.345037
    std       4722.241622
    min          0.000000
    25%          0.000000
    50%          0.000000
    75%          0.000000
    max      83000.000000
    Name: OIP, dtype: float64
PAP: Public assistance income past 12 months
    ['Note: Use values from ADJINC to adjust PAP to constant dollars.']
    bbbbb: N/A (less than 15 years old)
    00000: None
    00001..99999: $1 to $99999 (Rounded)
    count    26658.000000
    mean        76.790832
    std        692.300350
    min          0.000000
    25%          0.000000
    50%          0.000000
    75%          0.000000
    max      25400.000000
    Name: PAP, dtype: float64
RELP: Relationship
    00: Reference person
    01: Husband/wife
    02: Biological son or daughter
    count    30559.000000
    mean         3.656795
    std          5.569053
    min          0.000000
    25%          0.000000
    50%          1.000000
    75%          5.000000
    max         17.000000
    Name: RELP, dtype: float64
RETP: Retirement income past 12 months
    ['Note: Use values from ADJINC to adjust RETP to constant dollars.']
    bbbbbb: N/A (less than 15 years old)
    000000: None
    000001..999999: $1 to $999999 (Rounded and top-coded)
    count     26658.000000
    mean       3493.095881
    std       15552.960973
    min           0.000000
    25%           0.000000
    50%           0.000000
    75%           0.000000
    max      249000.000000
    Name: RETP, dtype: float64
SCH: School enrollment
    b: N/A (less than 3 years old)
    1: No, has not attended in the last 3 months
    2: Yes, public school or public college
    count    29645.000000
    mean         1.376050
    std          0.691228
    min          1.000000
    25%          1.000000
    50%          1.000000
    75%          2.000000
    max          3.000000
    Name: SCH, dtype: float64
SCHG: Grade level attending
    bb: N/A (not attending school)
    01: Nursery school/preschool
    02: Kindergarten
    count    7544.00000
    mean       11.28526
    std         5.07951
    min         1.00000
    25%         7.00000
    50%        14.00000
    75%        15.00000
    max        16.00000
    Name: SCHG, dtype: float64
SCHL: Educational attainment
    bb: N/A (less than 3 years old)
    01: No schooling completed
    02: Nursery school, preschool
    count    29645.000000
    mean        17.468173
    std          5.585758
    min          1.000000
    25%         16.000000
    50%         19.000000
    75%         21.000000
    max         24.000000
    Name: SCHL, dtype: float64
SEMP: Self-employment income past 12 months (signed)
    ['Note: Use values from ADJINC to adjust SEMP to constant dollars.']
    bbbbbb: N/A (less than 15 years old)
    000000: None
    -10000..-00001: Loss $1 to $10000 (Rounded and bottom-coded)
    count     26658.000000
    mean       2956.785243
    std       30447.719592
    min       -9100.000000
    25%           0.000000
    50%           0.000000
    75%           0.000000
    max      727000.000000
    Name: SEMP, dtype: float64
SEX: Sex
    1: Male
    2: Female
    count    30559.000000
    mean         1.537878
    std          0.498571
    min          1.000000
    25%          1.000000
    50%          2.000000
    75%          2.000000
    max          2.000000
    Name: SEX, dtype: float64
SSIP: Supplementary Security Income past 12 months
    ['Note: Use values from ADJINC to adjust SSIP to constant dollars.']
    bbbbb: N/A (less than 15 years old)
    00000: None
    00001..99999: $1 to $99999 (Rounded)
    count    26658.000000
    mean       296.556381
    std       1651.630937
    min          0.000000
    25%          0.000000
    50%          0.000000
    75%          0.000000
    max      30000.000000
    Name: SSIP, dtype: float64
SSP: Social Security income past 12 months
    ['Note: Use values from ADJINC to adjust SSP to constant dollars.']
    bbbbb: N/A (less than 15 years old)
    00000: None
    00001..99999: $1 to $99999 (Rounded)
    count    26658.000000
    mean      1618.331458
    std       4844.120790
    min          0.000000
    25%          0.000000
    50%          0.000000
    75%          0.000000
    max      50000.000000
    Name: SSP, dtype: float64
WAGP: Wages or salary income past 12 months
    ['Note: Use values from ADJINC to adjust WAGP to constant dollars.']
    bbbbbb: N/A (less than 15 years old)
    000000: None
    000001..999999: $1 to 999999 (Rounded and top-coded)
    count     26658.000000
    mean      41347.736139
    std       69993.911285
    min           0.000000
    25%           0.000000
    50%       15000.000000
    75%       60000.000000
    max      660000.000000
    Name: WAGP, dtype: float64
WKHP: Usual hours worked per week past 12 months
    bb: N/A (less than 16 years old/did not work during the past 12 months)
    01..98: 1 to 98 usual hours
    99: 99 or more usual hours
    count    17950.000000
    mean        39.674485
    std         13.046600
    min          1.000000
    25%         38.000000
    50%         40.000000
    75%         45.000000
    max         99.000000
    Name: WKHP, dtype: float64
WKL: When last worked
    b: N/A (less than 16 years old)
    1: Within the past 12 months
    2: 1-5 years ago
    count    26399.000000
    mean         1.550930
    std          0.842136
    min          1.000000
    25%          1.000000
    50%          1.000000
    75%          2.000000
    max          3.000000
    Name: WKL, dtype: float64
WKW: Weeks worked during past 12 months
    b: N/A (less than 16 years old/did not work during the past 12 months)
    1: 50 to 52 weeks
    2: 48 to 49 weeks
    count    17950.000000
    mean         1.952201
    std          1.698859
    min          1.000000
    25%          1.000000
    50%          1.000000
    75%          3.000000
    max          6.000000
    Name: WKW, dtype: float64
WRK: Worked last week
    b: N/A (not reported)
    1: Worked
    2: Did not work
    count    24046.000000
    mean         1.390086
    std          0.487779
    min          1.000000
    25%          1.000000
    50%          1.000000
    75%          2.000000
    max          2.000000
    Name: WRK, dtype: float64
YOEP05: Year of entry for data collected prior to 2012
    bbbb: Not eligible - Born in the US
    1919: 1919 or earlier (Bottom-coded)
    1920: 1920
    count    4268.000000
    mean     1126.390112
    std       990.940648
    min        -9.000000
    25%        -9.000000
    50%      1970.000000
    75%      1997.000000
    max      2011.000000
    Name: YOEP05, dtype: float64
YOEP12: Year of entry for data collected in 2012 or later
    bbbb: Not eligible - Born in the US
    1921: 1921 or earlier (Bottom-coded)
    1922: 1922 - 1923
    count    4268.000000
    mean      856.887769
    std       992.461884
    min        -9.000000
    25%        -9.000000
    50%        -9.000000
    75%      1995.000000
    max      2013.000000
    Name: YOEP12, dtype: float64
ANC: Ancestry recode
    1: Single
    2: Multiple
    3: Unclassified
    count    30559.000000
    mean         1.555810
    std          0.943709
    min          1.000000
    25%          1.000000
    50%          1.000000
    75%          2.000000
    max          4.000000
    Name: ANC, dtype: float64
ANC1P05: Recoded Detailed Ancestry for data collected prior to 2012 - first entry
    001: Alsatian
    003: Austrian
    005: Basque
    count    30559.000000
    mean       339.916359
    std        420.806625
    min         -9.000000
    25%         -9.000000
    50%         50.000000
    75%        902.000000
    max        999.000000
    Name: ANC1P05, dtype: float64
ANC1P12: Recoded Detailed Ancestry for data collected in 2012 or later - first entry
    001: Alsatian
    003: Austrian
    005: Basque
    count    30559.000000
    mean       251.538041
    std        397.645131
    min         -9.000000
    25%         -9.000000
    50%         -9.000000
    75%        615.000000
    max        999.000000
    Name: ANC1P12, dtype: float64
ANC2P05: Recoded Detailed Ancestry for data collected prior to 2012 - second entry
    001: Alsatian
    003: Austrian
    005: Basque
    count    30559.000000
    mean       478.113944
    std        494.015425
    min         -9.000000
    25%         -9.000000
    50%        125.000000
    75%        999.000000
    max        999.000000
    Name: ANC2P05, dtype: float64
ANC2P12: Recoded Detailed Ancestry for data collected in 2012 or later - second entry
    001: Alsatian
    003: Austrian
    005: Basque
    count    30559.000000
    mean       346.208515
    std        473.692419
    min         -9.000000
    25%         -9.000000
    50%         -9.000000
    75%        999.000000
    max        999.000000
    Name: ANC2P12, dtype: float64
DECADE: Decade of entry
    b: N/A (Born in the US)
    1: Before 1950
    2: 1950 - 1959
    count    4268.000000
    mean        5.698454
    std         1.477502
    min         1.000000
    25%         5.000000
    50%         6.000000
    75%         7.000000
    max         7.000000
    Name: DECADE, dtype: float64
DIS: Disability recode
    1: With a disability
    2: Without a disability
    count    30559.000000
    mean         1.871527
    std          0.334621
    min          1.000000
    25%          2.000000
    50%          2.000000
    75%          2.000000
    max          2.000000
    Name: DIS, dtype: float64
DRIVESP: Number of vehicles calculated from JWRI
    b: N/A (Nonworker or worker who does not drive to work)
    1: 1.000 vehicles (Drove alone)
    2: 0.500 vehicles (In a 2-person carpool)
    count    6211.000000
    mean        1.217517
    std         0.608995
    min         1.000000
    25%         1.000000
    50%         1.000000
    75%         1.000000
    max         6.000000
    Name: DRIVESP, dtype: float64
ESP: Employment status of parents
    b: N/A (not own child of householder, and not child in subfamily) Living with two parents:
    1: Both parents in labor force
    2: Father only in labor force
    count    4318.000000
    mean        4.348541
    std         2.889376
    min         1.000000
    25%         1.000000
    50%         5.000000
    75%         7.000000
    max         8.000000
    Name: ESP, dtype: float64
ESR: Employment status recode
    b: N/A (less than 16 years old)
    1: Civilian employed, at work
    2: Civilian employed, with a job but not at work
    count    26399.000000
    mean         2.880071
    std          2.322380
    min          1.000000
    25%          1.000000
    50%          1.000000
    75%          6.000000
    max          6.000000
    Name: ESR, dtype: float64
FOD1P: Recoded field of degree - first entry
    bbbb: N/A (less than bachelor's degree)
    count    12871.000000
    mean      4518.419470
    std       1596.571638
    min       1100.000000
    25%       3301.000000
    50%       5404.000000
    75%       5507.000000
    max       6403.000000
    Name: FOD1P, dtype: float64
FOD2P: Recoded field of degree - second entry
    bbbb: N/A (less than bachelor's degree)
    count    2230.000000
    mean     4427.388789
    std      1610.279474
    min      1101.000000
    25%      2602.000000
    50%      5200.000000
    75%      5506.000000
    max      6403.000000
    Name: FOD2P, dtype: float64
HICOV: Health insurance coverage recode
    1: With health insurance coverage
    2: No health insurance coverage
    count    30559.00000
    mean         1.05815
    std          0.23403
    min          1.00000
    25%          1.00000
    50%          1.00000
    75%          1.00000
    max          2.00000
    Name: HICOV, dtype: float64
HISP: Recoded detailed Hispanic origin
    01: Not Spanish/Hispanic/Latino
    02: Mexican
    03: Puerto Rican
    count    30559.000000
    mean         1.666808
    std          2.950180
    min          1.000000
    25%          1.000000
    50%          1.000000
    75%          1.000000
    max         24.000000
    Name: HISP, dtype: float64
INDP: Industry recode based on 2012 IND codes
    ['NOTE: Changes were made to this variable between the 2008-2012 and 2009-2013 5-year PUMS files. For additional information on industry groupings within major categories visit our website at:']
    bbbb: Not in universe (less than 16 years old/NILF who last worked more than 5 years ago or never worked)
    count    20557.000000
    mean      7685.842584
    std       1845.245220
    min        170.000000
    25%       7270.000000
    50%       7870.000000
    75%       9160.000000
    max       9920.000000
    Name: INDP, dtype: float64
JWAP: Time of arrival at work - hour and minute
    bbb: N/A (not a worker; worker who worked at home)
    001: 12:00 a.m. to 12:04 a.m.
    002: 12:05 a.m. to 12:09 a.m.
    count    14545.000000
    mean       106.071640
    std         33.565316
    min          1.000000
    25%         91.000000
    50%        100.000000
    75%        109.000000
    max        284.000000
    Name: JWAP, dtype: float64
JWDP: Time of departure for work - hour and minute
    bbb: N/A (not a worker; worker who worked at home)
    001: 12:00 a.m. to 12:29 a.m.
    002: 12:30 a.m. to 12:59 a.m.
    count    14545.000000
    mean        57.893022
    std         22.927208
    min          1.000000
    25%         43.000000
    50%         55.000000
    75%         64.000000
    max        150.000000
    Name: JWDP, dtype: float64
LANP05: Language spoken at home for data collected prior to 2012
    bbb: N/A (less than 5 years old/speaks only English)
    601: Jamaican Creole
    607: German
    count    4231.000000
    mean      362.528717
    std       336.415892
    min        -9.000000
    25%        -9.000000
    50%       620.000000
    75%       625.000000
    max       994.000000
    Name: LANP05, dtype: float64
LANP12: Language spoken at home for data collected in 2012 or later
    bbb: N/A (less than 5 years old/speaks only English)
    601: Jamaican Creole
    602: Krio
    count    4231.000000
    mean      290.464429
    std       337.743570
    min        -9.000000
    25%        -9.000000
    50%        -9.000000
    75%       625.000000
    max       994.000000
    Name: LANP12, dtype: float64
MIGPUMA00: Migration PUMA based on Census 2000 definition for data collected prior to 2012
    bbbbb: N/A (person less than 1 year old/lived in same house 1 year ago)
    00001: Did not live in the United States or in Puerto Rico one year ago
    00002: Lived in Puerto Rico one year ago and current residence is in the U.S.
    count    5832.000000
    mean      465.729767
    std      1096.576145
    min        -9.000000
    25%        -9.000000
    50%       100.000000
    75%       100.000000
    max      8100.000000
    Name: MIGPUMA00, dtype: float64
MIGPUMA10: Migration PUMA based on 2010 Census definition for data collected in 2012 or later
    bbbbb: N/A (person less than 1 year old/lived in same house 1 year ago)
    00001: Did not live in the United States or in Puerto Rico one year ago
    00002: Lived in Puerto Rico one year ago and current residence is in the U.S.
    count     5832.000000
    mean      1528.504630
    std       7819.009818
    min         -9.000000
    25%         -9.000000
    50%         -9.000000
    75%        100.000000
    max      59300.000000
    Name: MIGPUMA10, dtype: float64
MIGSP05: Migration recode for data collected prior to 2012 - State or foreign country code
    bbb: N/A (person less than 1 year old/lived in same house 1 year ago)
    001: Alabama/AL
    002: Alaska/AK
    count    5832.000000
    mean       16.364026
    std        57.178932
    min        -9.000000
    25%        -9.000000
    50%        11.000000
    75%        12.000000
    max       554.000000
    Name: MIGSP05, dtype: float64
MIGSP12: Migration recode for data collected in 2012 or later - State or foreign country code
    bbb: N/A (person less than 1 year old/lived in same house 1 year ago)
    001: Alabama/AL
    002: Alaska/AK
    count    5832.000000
    mean       11.033951
    std        50.483716
    min        -9.000000
    25%        -9.000000
    50%        -9.000000
    75%        11.000000
    max       555.000000
    Name: MIGSP12, dtype: float64
MSP: Married, spouse present/spouse absent
    b: N/A (age less than 15 years)
    1: Now married, spouse present
    2: Now married, spouse absent
    count    26658.000000
    mean         4.202491
    std          2.152624
    min          1.000000
    25%          1.000000
    50%          6.000000
    75%          6.000000
    max          6.000000
    Name: MSP, dtype: float64
NAICSP: NAICS Industry code based on 2012 NAICS codes
    ['NOTE: Changes were made to this variable between the 2008-2012 and 2009-2013 5-year PUMS files. For additional information on NAICS groupings within major categories visit our website at:']
    bbbbbbbb: Not in universe (less than 16 years old/NILF who last worked more than 5 years ago or never worked)
    count     20557
    unique      223
    top        722Z
    freq        993
    Name: NAICSP, dtype: object
NATIVITY: Nativity
    1: Native
    2: Foreign born
    count    30559.000000
    mean         1.123237
    std          0.328714
    min          1.000000
    25%          1.000000
    50%          1.000000
    75%          1.000000
    max          2.000000
    Name: NATIVITY, dtype: float64
NOP: Nativity of parent
    b: N/A (greater than 17 years old/not an own child of householder, and not child in subfamily)
    1: Living with two parents: Both parents NATIVE
    2: Living with two parents: Father only FOREIGN BORN
    count    4312.000000
    mean        4.474954
    std         2.696136
    min         1.000000
    25%         1.000000
    50%         5.000000
    75%         7.000000
    max         8.000000
    Name: NOP, dtype: float64
OC: Own child
    0: No (includes GQ)
    1: Yes
    count    30559.000000
    mean         0.124775
    std          0.330469
    min          0.000000
    25%          0.000000
    50%          0.000000
    75%          0.000000
    max          1.000000
    Name: OC, dtype: float64
OCCP02: Occupation recode for data collected in 2009 based on 2002 OCC codes
    bbbb: Not in universe (less than 16 years old/NILF who last worked more than 5 years ago or never worked)
    count     20557
    unique      314
    top        N.A.
    freq      16866
    Name: OCCP02, dtype: object
OCCP10: Occupation recode for data collected in 2010 and 2011 based on 2010 OCC codes
    bbbb: Not in universe (less than 16 years old/NILF who last worked more than 5 years ago or never worked)
    count     20557
    unique      370
    top        N.A.
    freq      12485
    Name: OCCP10, dtype: object
OCCP12: Occupation recode for data collected in 2012 or later based on 2010 OCC codes
    ['NOTE: For additional information on NAICS and SOC groupings within major categories visit our website at:']
    bbbb: N/A (less than 16 years old/NILF who last worked more than 5 years ago or never worked)
    count     20557
    unique      357
    top        N.A.
    freq      11763
    Name: OCCP12, dtype: object
PAOC: Presence and age of own children
    b: N/A (male/female under 16 years old/GQ)
    1: Females with own children under 6 years only
    2: Females with own children 6 to 17 years only
    count    13083.000000
    mean         3.616372
    std          0.875946
    min          1.000000
    25%          4.000000
    50%          4.000000
    75%          4.000000
    max          4.000000
    Name: PAOC, dtype: float64
PERNP: Total person's earnings
    ['Note: Use values from ADJINC to adjust PERNP to constant dollars.']
    bbbbbbb: N/A (less than 15 years old)
    0000000: No earnings
    -010000: Loss of $10000 or more (Rounded & bottom-coded components)
    count      26399.000000
    mean       44739.192053
    std        77239.316348
    min        -9100.000000
    25%            0.000000
    50%        20000.000000
    75%        62000.000000
    max      1360000.000000
    Name: PERNP, dtype: float64
PINCP: Total person's income (signed)
    ['Note: Use values from ADJINC to adjust PINCP to constant dollars.']
    bbbbbbb: N/A (less than 15 years old)
    0000000: None
    -019999: Loss of $19999 or more (Rounded & bottom-coded components)
    count      26658.000000
    mean       53262.965339
    std        82561.741382
    min       -13600.000000
    25%         7200.000000
    50%        30000.000000
    75%        70000.000000
    max      1471000.000000
    Name: PINCP, dtype: float64
POBP05: Place of birth (Recode) for data collected prior to 2012
    001: Alabama/AL
    002: Alaska/AK
    004: Arizona/AZ
    count    30559.000000
    mean        29.560948
    std         80.011666
    min         -9.000000
    25%         -9.000000
    50%         11.000000
    75%         31.000000
    max        554.000000
    Name: POBP05, dtype: float64
POBP12: Place of birth (Recode) for data collected in 2012 or later
    001: Alabama/AL
    002: Alaska/AK
    004: Arizona/AZ
    count    30559.000000
    mean        19.925554
    std         72.939454
    min         -9.000000
    25%         -9.000000
    50%         -9.000000
    75%         12.000000
    max        515.000000
    Name: POBP12, dtype: float64
POVPIP: Income-to-poverty ratio recode
    bbb: N/A
    000..500: Below 501 percent
    501: 501 percent or more
    count    28378.000000
    mean       331.494045
    std        181.116198
    min          0.000000
    25%        158.000000
    50%        398.000000
    75%        501.000000
    max        501.000000
    Name: POVPIP, dtype: float64
POWPUMA00: Place of work PUMA based on Census 2000 definition for data collected prior to 2012
    bbbbb: N/A (not a worker--not in the labor force, including persons under 16 years; unemployed; civilian employed, with a job not at work; Armed Forces, with a job but not at work)
    00001: Did not work in the United States or in Puerto Rico
    00100..08200: Assigned Place of work PUMA.  Use with POWSP05.
    count    15327.000000
    mean       144.907810
    std        345.593937
    min         -9.000000
    25%         -9.000000
    50%        100.000000
    75%        100.000000
    max       6890.000000
    Name: POWPUMA00, dtype: float64
POWPUMA10: Place of work PUMA based on 2010 Census definition for data collected in 2012 or later
    bbbbb: N/A (not a worker--not in the labor force, including persons under 16 years; unemployed; civilian employed, with a job not at work; Armed Forces, with a job but not at work)
    00001: Did not work in the United States or in Puerto Rico
    00100..70100: Assigned Place of work PUMA.  Use with POWSP12.
    count    15327.000000
    mean      1563.770470
    std       8888.575098
    min         -9.000000
    25%         -9.000000
    50%         -9.000000
    75%        100.000000
    max      59300.000000
    Name: POWPUMA10, dtype: float64
POWSP05: Place of work for data collected prior to 2012 - State or foreign country recode
    bbb: N/A (not a worker--not in the labor force, including persons under 16 years; unemployed; employed, with a job not at work; Armed Forces, with a job but not at work)
    001: Alabama/AL
    002: Alaska/AK
    count    15327.000000
    mean         6.471978
    std         18.985725
    min         -9.000000
    25%         -9.000000
    50%         11.000000
    75%         11.000000
    max        555.000000
    Name: POWSP05, dtype: float64
POWSP12: Place of work for data collected in 2012 or later - State or foreign country recode
    bbb: N/A (not a worker--not in the labor force, including persons under 16 years; unemployed; employed, with a job not at work; Armed Forces, with a job but not at work)
    001: Alabama/AL
    002: Alaska/AK
    count    15327.000000
    mean         2.357539
    std         17.856144
    min         -9.000000
    25%         -9.000000
    50%         -9.000000
    75%         11.000000
    max        555.000000
    Name: POWSP12, dtype: float64
PRIVCOV: Private health insurance coverage recode
    1: With private health insurance coverage
    2: Without private health insurance coverage
    count    30559.000000
    mean         1.283877
    std          0.450885
    min          1.000000
    25%          1.000000
    50%          1.000000
    75%          2.000000
    max          2.000000
    Name: PRIVCOV, dtype: float64
PUBCOV: Public health coverage recode
    1: With public health coverage
    2: Without public health coverage
    count    30559.000000
    mean         1.651461
    std          0.476515
    min          1.000000
    25%          1.000000
    50%          2.000000
    75%          2.000000
    max          2.000000
    Name: PUBCOV, dtype: float64
QTRBIR: Quarter of birth
    1: January through March
    2: April through June
    3: July through September
    count    30559.000000
    mean         2.520665
    std          1.111741
    min          1.000000
    25%          2.000000
    50%          3.000000
    75%          4.000000
    max          4.000000
    Name: QTRBIR, dtype: float64
RAC1P: Recoded detailed race code
    1: White alone
    2: Black or African American alone
    3: American Indian alone
    count    30559.000000
    mean         2.025132
    std          1.739391
    min          1.000000
    25%          1.000000
    50%          2.000000
    75%          2.000000
    max          9.000000
    Name: RAC1P, dtype: float64
RAC2P05: Recoded detailed race code for data collected prior to 2012
    01: White alone
    02: Black or African American alone
    03: Apache alone
    count    30559.000000
    mean        -0.339834
    std         13.838677
    min         -9.000000
    25%         -9.000000
    50%          1.000000
    75%          2.000000
    max         67.000000
    Name: RAC2P05, dtype: float64
RAC2P12: Recoded detailed race code for data collected in 2012 or later
    01: White alone
    02: Black or African American alone
    03: Apache alone
    count    30559.000000
    mean        -2.417291
    std         13.208816
    min         -9.000000
    25%         -9.000000
    50%         -9.000000
    75%          1.000000
    max         68.000000
    Name: RAC2P12, dtype: float64
RAC3P05: Recoded detailed race code for data collected prior to 2012
    01: Some other race alone
    02: Other Pacific Islander alone
    03: Samoan alone
    count    30559.000000
    mean        27.004778
    std         33.030433
    min         -9.000000
    25%         -9.000000
    50%         44.000000
    75%         68.000000
    max         72.000000
    Name: RAC3P05, dtype: float64
RAC3P12: Recoded detailed race code for data collected in 2012 or later
    001: White alone
    002: Black or African American alone
    003: American Indian and Alaska Native alone
    count    30559.000000
    mean        -4.015347
    std          7.034421
    min         -9.000000
    25%         -9.000000
    50%         -9.000000
    75%          1.000000
    max         95.000000
    Name: RAC3P12, dtype: float64
RACAIAN: American Indian and Alaska Native recode (American Indian and Alaska Native alone or in combination with one or more other races)
    0: No
    1: Yes
    count    30559.000000
    mean         0.009490
    std          0.096954
    min          0.000000
    25%          0.000000
    50%          0.000000
    75%          0.000000
    max          1.000000
    Name: RACAIAN, dtype: float64
RACASN: Asian recode (Asian alone or in combination with one or more other races)
    0: No
    1: Yes
    count    30559.000000
    mean         0.044308
    std          0.205781
    min          0.000000
    25%          0.000000
    50%          0.000000
    75%          0.000000
    max          1.000000
    Name: RACASN, dtype: float64
RACBLK: Black or African American recode (Black alone or in combination with one or more other races)
    0: No
    1: Yes
    count    30559.000000
    mean         0.497464
    std          0.500002
    min          0.000000
    25%          0.000000
    50%          0.000000
    75%          1.000000
    max          1.000000
    Name: RACBLK, dtype: float64
RACNHPI: Native Hawaiian and Other Pacific Islander recode (Native Hawaiian and Other Pacific Islander alone or in combination with one or more other races)
    0: No
    1: Yes
    count    30559.000000
    mean         0.001080
    std          0.032844
    min          0.000000
    25%          0.000000
    50%          0.000000
    75%          0.000000
    max          1.000000
    Name: RACNHPI, dtype: float64
RACNUM: Number of major race groups represented
    1..6: Race groups
    count    30559.000000
    mean         1.027291
    std          0.185123
    min          1.000000
    25%          1.000000
    50%          1.000000
    75%          1.000000
    max          4.000000
    Name: RACNUM, dtype: float64
RACSOR: Some other race recode (Some other race alone or in combination with one or more other races)
    0: No
    1: Yes
    count    30559.000000
    mean         0.027750
    std          0.164257
    min          0.000000
    25%          0.000000
    50%          0.000000
    75%          0.000000
    max          1.000000
    Name: RACSOR, dtype: float64
RACWHT: White recode (White alone or in combination with one or more other races)
    0: No
    1: Yes
    count    30559.000000
    mean         0.447200
    std          0.497213
    min          0.000000
    25%          0.000000
    50%          0.000000
    75%          1.000000
    max          1.000000
    Name: RACWHT, dtype: float64
RC: Related child
    0: No (includes GQ)
    1: Yes
    count    30559.000000
    mean         0.149939
    std          0.357018
    min          0.000000
    25%          0.000000
    50%          0.000000
    75%          0.000000
    max          1.000000
    Name: RC, dtype: float64
SCIENGP: Field of degree science and engineering flag - NSF definition
    b: N/A (less than a bachelor's degree)
    1: Yes
    2: No
    count    12871.000000
    mean         1.479605
    std          0.499603
    min          1.000000
    25%          1.000000
    50%          1.000000
    75%          2.000000
    max          2.000000
    Name: SCIENGP, dtype: float64
SCIENGRLP: Field of degree science and engineering related flag - NSF definition
    b: N/A (less than a bachelor's degree)
    1: Yes
    2: No
    count    12871.000000
    mean         1.951674
    std          0.214462
    min          1.000000
    25%          2.000000
    50%          2.000000
    75%          2.000000
    max          2.000000
    Name: SCIENGRLP, dtype: float64
SFN: Subfamily number
    b: N/A (GQ/not in a subfamily)
    1: In subfamily 1
    2: In subfamily 2
    count    923
    mean       1
    std        0
    min        1
    25%        1
    50%        1
    75%        1
    max        1
    Name: SFN, dtype: float64
SFR: Subfamily relationship
    b: N/A (GQ/not in a subfamily)
    1: Husband/wife no children
    2: Husband/wife with children
    count    923.000000
    mean       3.963164
    std        1.375627
    min        1.000000
    25%        3.000000
    50%        5.000000
    75%        5.000000
    max        6.000000
    Name: SFR, dtype: float64
SOCP00: SOC Occupation code for data collected in 2009 based on 2000 SOC codes
    bbbbbb: Not in universe (less than 16 years old/NILF who last worked more than 5 years ago or never worked)
    count      20557
    unique       314
    top       N.A.//
    freq       16866
    Name: SOCP00, dtype: object
SOCP10: SOC Occupation code for data collected in 2010 and 2011 based on 2010 SOC codes
    bbbbbb: Not in universe (less than 16 years old/NILF who last worked more than 5 years ago or never worked)
    count      20557
    unique       370
    top       N.A.//
    freq       12485
    Name: SOCP10, dtype: object
SOCP12: SOC Occupation recode for data collected in 2012 or later based on 2010 SOC codes
    ['NOTE: For additional information on NAICS and SOC groupings within major categories visit our website at:']
    bbbbbb: N/A (less than 16 years old/NILF who last worked more than 5 years ago or never worked)
    count      20557
    unique       357
    top       N.A.//
    freq       11763
    Name: SOCP12, dtype: object
VPS: Veteran period of service
    bb: N/A (less than 17 years old, no active duty) War Times:
    01: Gulf War: 9/2001 or later
    02: Gulf War: 9/2001 or later and Gulf War: 8/1990 - 8/2001
    count    1855.000000
    mean        6.997844
    std         4.099096
    min         1.000000
    25%         4.000000
    50%         6.000000
    75%        11.000000
    max        15.000000
    Name: VPS, dtype: float64
WAOB: World area of birth ****
    1: US state (POB = 001-059)
    2: PR and US Island Areas (POB = 060-099)
    3: Latin America (POB = 303,310-399)
    count    30559.000000
    mean         1.457999
    std          1.235133
    min          1.000000
    25%          1.000000
    50%          1.000000
    75%          1.000000
    max          8.000000
    Name: WAOB, dtype: float64
num columns described = 145

insp: Column not in data dictionary.
    count    6561.000000
    mean      999.282731
    std      1085.174484
    min         0.000000
    25%       370.000000
    50%       790.000000
    75%      1200.000000
    max      8600.000000
    Name: insp, dtype: float64
RT: Record Type
    H: Housing Record or Group Quarters Unit
    count     17500
    unique        1
    top           H
    freq      17500
    Name: RT, dtype: object
SERIALNO: Housing unit/GQ person serial number
    2009000000001..2013999999999: Unique identifier
    count    1.750000e+04
    mean     2.011068e+12
    std      1.401911e+09
    min      2.009000e+12
    25%      2.010000e+12
    50%      2.011001e+12
    75%      2.012001e+12
    max      2.013001e+12
    Name: SERIALNO, dtype: float64
DIVISION: Division code
    0: Puerto Rico
    1: New England (Northeast region)
    2: Middle Atlantic (Northeast region)
    count    17500
    mean         5
    std          0
    min          5
    25%          5
    50%          5
    75%          5
    max          5
    Name: DIVISION, dtype: float64
PUMA00: Public use microdata area code (PUMA) based on Census 2000 definition for data collected prior to 2012. Use in combination with PUMA10.
    ['Note: Public Use Microdata Areas (PUMAs) designate areas of 100,000 or more population.  Use with ST for unique code. PUMA00 applies to data collected in calendar years 2011 and earlier. Users of multi-year datasets that contain data from before and after 2012 should use PUMA00 and PUMA10 together where possible, but not all PUMAs will be comparable. See to identify PUMA changes between the two vintages.']
    00100..08200: Public use microdata area codes
    77777: Combination of 01801, 01802, and 01905 in Louisiana
    -0009: Code classification is Not Applicable because data collected in 2012 or later
    count    17500.000000
    mean        56.427371
    std         55.291036
    min         -9.000000
    25%         -9.000000
    50%        101.000000
    75%        103.000000
    max        105.000000
    Name: PUMA00, dtype: float64
PUMA10: Public use microdata area code (PUMA) based on 2010 Census definition for data collected in 2012 or later. Use in combination with PUMA00.
    ['Note: Public Use Microdata Areas (PUMAs) designate areas of 100,000 or more population.  Use with ST for unique code. PUMA10 applies to data collected in calendar year 2012 and later. Users of multi-year datasets that contain data from before and after 2012 should use PUMA00 and PUMA10 together where possible, but not all PUMAs will be comparable. See to identify PUMA changes between the two vintages.']
    00100..70301: Public use microdata area codes
    -0009: Code classification is Not Applicable because data collected prior to 2012
    count    17500.000000
    mean        37.764171
    std         55.358495
    min         -9.000000
    25%         -9.000000
    50%         -9.000000
    75%        103.000000
    max        105.000000
    Name: PUMA10, dtype: float64
REGION: Region code
    1: Northeast
    2: Midwest
    3: South
    count    17500
    mean         3
    std          0
    min          3
    25%          3
    50%          3
    75%          3
    max          3
    Name: REGION, dtype: float64
ST: State Code
    01: Alabama/AL
    02: Alaska/AK
    04: Arizona/AZ
    count    17500
    mean        11
    std          0
    min         11
    25%         11
    50%         11
    75%         11
    max         11
    Name: ST, dtype: float64
ADJHSG: Adjustment factor for housing dollar amounts (6 implied decimal places)
    ['Note: The values of ADJHSG inflation-adjusts reported housing costs to 2013 dollars and applies to variables CONP, ELEP, FULP, GASP, GRNTP, INSP, MHP, MRGP, SMOCP, RNTP, SMP, and WATP in the housing record. ADJHSG does not apply to AGS or TAXP because they are categorical variables that should not be inflation-adjusted.']
    1086032: 2009 factor
    1068395: 2010 factor
    1035725: 2011 factor
    count      17500.000000
    mean     1039364.231657
    std        31877.254257
    min      1000000.000000
    25%      1014531.000000
    50%      1035725.000000
    75%      1068395.000000
    max      1086032.000000
    Name: ADJHSG, dtype: float64
ADJINC: Adjustment factor for income and earnings dollar amounts (6 implied decimal places)
    ['Note: The values of ADJINC inflation-adjusts reported income to 2013 dollars. ADJINC incorporates an adjustment that annualizes the different rolling reference periods for reported income (as done in the single-year data using the variable ADJINC from the 1-year file) and an adjustment to inflation-adjust the annualized income to 2013 dollars.  ADJINC applies to variables FINCP and HINCP in the housing record, and variables INTP, OIP, PAP, PERNP, PINCP, RETP, SEMP, SSIP, SSP, and WAGP in the person record.']
    1085467: 2009 factor (0.999480 * 1.08603175)
    1076540: 2010 factor (1.007624 * 1.06839475)
    1054614: 2011 factor (1.018237 * 1.03572510)
    count      17500.000000
    mean     1048478.770229
    std        29598.269890
    min      1007549.000000
    25%      1024887.000000
    50%      1054614.000000
    75%      1076540.000000
    max      1085467.000000
    Name: ADJINC, dtype: float64
WGTP: Housing Weight
    0000: Group Quarter placeholder record
    00001..09999: Integer weight of housing unit
    count    17500.000000
    mean        17.047257
    std         13.878535
    min          0.000000
    25%         10.000000
    50%         14.000000
    75%         22.000000
    max        172.000000
    Name: WGTP, dtype: float64
NP: Number of person records following this housing record
    00: Vacant unit
    01: One person record (one person in household or any person in group quarters)
    02..20: Number of person records (number of persons in household)
    count    17500.000000
    mean         1.746229
    std          1.291371
    min          0.000000
    25%          1.000000
    50%          1.000000
    75%          2.000000
    max         13.000000
    Name: NP, dtype: float64
TYPE: Type of unit
    1: Housing unit
    2: Institutional group quarters
    3: Noninstitutional group quarters
    count    17500.000000
    mean         1.268514
    std          0.655686
    min          1.000000
    25%          1.000000
    50%          1.000000
    75%          1.000000
    max          3.000000
    Name: TYPE, dtype: float64
ACR: Lot size
    b: N/A (GQ/not a one-family house or mobile home)
    1: House on less than one acre
    2: House on one to less than ten acres
    count    6388.000000
    mean        1.028961
    std         0.182034
    min         1.000000
    25%         1.000000
    50%         1.000000
    75%         1.000000
    max         3.000000
    Name: ACR, dtype: float64
AGS: Sales of Agriculture Products (Yearly sales)
    ['Note: No adjustment factor is applied to AGS.']
    b: N/A (GQ/vacant/not a one-family house or mobile home/less than 1 acre)
    1: None
    2: $    1 - $  999
    count    164.000000
    mean       1.201220
    std        0.934544
    min        1.000000
    25%        1.000000
    50%        1.000000
    75%        1.000000
    max        6.000000
    Name: AGS, dtype: float64
BATH: Bathtub or shower
    b: N/A (GQ)
    1: Yes
    2: No
    count    14844.000000
    mean         1.005322
    std          0.072760
    min          1.000000
    25%          1.000000
    50%          1.000000
    75%          1.000000
    max          2.000000
    Name: BATH, dtype: float64
BDSP: Number of bedrooms
    bb    : N/A (GQ)
    00..99: 0 to 99 bedrooms (Top-coded)
    count    14844.000000
    mean         2.171989
    std          1.452519
    min          0.000000
    25%          1.000000
    50%          2.000000
    75%          3.000000
    max         14.000000
    Name: BDSP, dtype: float64
BLD: Units in structure
    bb: N/A (GQ)
    01: Mobile home or trailer
    02: One-family house detached
    count    14844.000000
    mean         5.365131
    std          2.671220
    min          2.000000
    25%          3.000000
    50%          5.000000
    75%          8.000000
    max          9.000000
    Name: BLD, dtype: float64
BUS: Business or medical office on property
    b: N/A (GQ/not a one-family house or mobile home)
    1: Yes
    2: No
    count    6388.000000
    mean        1.986381
    std         0.115913
    min         1.000000
    25%         2.000000
    50%         2.000000
    75%         2.000000
    max         2.000000
    Name: BUS, dtype: float64
CONP: Condo fee (monthly amount)
    ['Note: Use values from ADJHSG to adjust CONP to constant dollars.']
    bbbb: N/A (GQ/vacant/not owned or being bought)
    0000: Not condo
    0001..9999: $1 - $9999 (Rounded and top-coded)
    count    14844.000000
    mean        51.061035
    std        175.491301
    min          0.000000
    25%          0.000000
    50%          0.000000
    75%          0.000000
    max       1700.000000
    Name: CONP, dtype: float64
ELEP: Electricity (monthly cost)
    ['Note: Use values from ADJHSG to adjust ELEP values 3 and over to constant dollars.']
    bbb: N/A (GQ/vacant)
    001: Included in rent or in condo fee
    002: No charge or electricity not used
    count    13737.000000
    mean        92.849967
    std         94.873654
    min          1.000000
    25%         30.000000
    50%         70.000000
    75%        130.000000
    max        570.000000
    Name: ELEP, dtype: float64
FS: Yearly food stamp/Supplemental Nutrition Assistance Program recipiency
    b: N/A (vacant)
    1: Yes
    2: No
    count    16393.000000
    mean         1.874093
    std          0.331755
    min          1.000000
    25%          2.000000
    50%          2.000000
    75%          2.000000
    max          2.000000
    Name: FS, dtype: float64
FULP: Fuel cost(yearly cost for fuels other than gas and electricity)
    ['Note: Use values from ADJHSG to adjust FULP values 3 and over to constant dollars.']
    bbbb: N/A (GQ/vacant)
    0001: Included in rent or in condo fee
    0002: No charge or these fuels not used
    count    13737.000000
    mean        45.400306
    std        311.391053
    min          1.000000
    25%          2.000000
    50%          2.000000
    75%          2.000000
    max       5200.000000
    Name: FULP, dtype: float64
GASP: Gas (monthly cost)
    ['Note: Use values from ADJHSG to adjust GASP values 4 and over to constant dollars.']
    bbb: N/A (GQ/vacant)
    001: Included in rent or in condo fee
    002: Included in electricity payment
    count    13737.000000
    mean        56.089758
    std         88.354993
    min          1.000000
    25%          3.000000
    50%         10.000000
    75%         80.000000
    max        580.000000
    Name: GASP, dtype: float64
HFL: House heating fuel
    b: N/A (GQ/vacant)
    1: Utility gas
    2: Bottled, tank, or LP gas
    count    13737.000000
    mean         1.878867
    std          1.306775
    min          1.000000
    25%          1.000000
    50%          1.000000
    75%          3.000000
    max          9.000000
    Name: HFL, dtype: float64
MHP: Mobile home costs (yearly amount)
    ['Note: Use values from ADJHSG to adjust MHP to constant dollars.']
    bbbbb: N/A (GQ/vacant/not owned or being bought/ not mobile home)
    00000: None
    00001..99999: $1 to $99999 (Rounded and top-coded)
    count     0
    mean    NaN
    std     NaN
    min     NaN
    25%     NaN
    50%     NaN
    75%     NaN
    max     NaN
    Name: MHP, dtype: float64
MRGI: First mortgage payment includes fire/hazard/flood insurance
    b: N/A (GQ/vacant/not owned or being bought/not mortgaged)
    1: Yes, insurance included in payment
    2: No, insurance paid separately or no insurance
    count    4971.000000
    mean        1.446590
    std         0.497189
    min         1.000000
    25%         1.000000
    50%         1.000000
    75%         2.000000
    max         2.000000
    Name: MRGI, dtype: float64
MRGP: First mortgage payment (monthly amount)
    ['Note: Use values from ADJHSG to adjust MRGP to constant dollars.']
    bbbbb: N/A (GQ/vacant/not owned or being bought/not mortgaged)
    00001..99999: $1 to $99999 (Rounded and top-coded)
    count    4971.000000
    mean     1936.767250
    std      1244.418262
    min        10.000000
    25%      1000.000000
    50%      1700.000000
    75%      2500.000000
    max      8000.000000
    Name: MRGP, dtype: float64
MRGT: First mortgage payment includes real estate taxes
    b: N/A (GQ/vacant/not owned or being bought/not mortgaged)
    1: Yes, taxes included in payment
    2: No, taxes paid separately or taxes not required
    count    4971.000000
    mean        1.281633
    std         0.449841
    min         1.000000
    25%         1.000000
    50%         1.000000
    75%         2.000000
    max         2.000000
    Name: MRGT, dtype: float64
MRGX: First mortgage status
    b: N/A (GQ/vacant/not owned or being bought)
    1: Mortgage, deed of trust, or similar debt
    2: Contract to purchase
    count    6561.000000
    mean        1.490626
    std         0.857150
    min         1.000000
    25%         1.000000
    50%         1.000000
    75%         1.000000
    max         3.000000
    Name: MRGX, dtype: float64
REFR: Refrigerator
    b: N/A (GQ)
    1: Yes
    2: No
    count    14844.000000
    mean         1.007949
    std          0.088807
    min          1.000000
    25%          1.000000
    50%          1.000000
    75%          1.000000
    max          2.000000
    Name: REFR, dtype: float64
RMSP: Number of Rooms
    bb: N/A (GQ)
    00..99: Rooms (Top-coded)
    count    14844.000000
    mean         5.139854
    std          2.895583
    min          1.000000
    25%          3.000000
    50%          4.000000
    75%          7.000000
    max         24.000000
    Name: RMSP, dtype: float64
RNTM: Meals included in rent
    b: N/A (GQ/not a rental unit/occupied without rent payment)
    1: Yes
    2: No
    count    7373.000000
    mean        1.987658
    std         0.110416
    min         1.000000
    25%         2.000000
    50%         2.000000
    75%         2.000000
    max         2.000000
    Name: RNTM, dtype: float64
RNTP: Monthly rent
    ['Note: Use values from ADJHSG to adjust RNTP to constant dollars.']
    bbbbb: N/A (GQ/not a rental unit/occupied without rent payment)
    00001..99999: $1 to $99999 (Rounded and top-coded)
    count    7373.000000
    mean     1246.782856
    std       769.088231
    min         4.000000
    25%       730.000000
    50%      1100.000000
    75%      1700.000000
    max      3900.000000
    Name: RNTP, dtype: float64
RWAT: Hot and cold running water
    b: N/A (GQ)
    1: Yes
    2: No
    count    14844.000000
    mean         1.007343
    std          0.085379
    min          1.000000
    25%          1.000000
    50%          1.000000
    75%          1.000000
    max          2.000000
    Name: RWAT, dtype: float64
RWATPR: Running water
    b: N/A (GQ)
    1: Yes
    2: No
    count    14844
    mean         9
    std          0
    min          9
    25%          9
    50%          9
    75%          9
    max          9
    Name: RWATPR, dtype: float64
SINK: Sink with a faucet
    b: N/A (GQ)
    1: Yes
    2: No
    count    14844.000000
    mean         1.005457
    std          0.073670
    min          1.000000
    25%          1.000000
    50%          1.000000
    75%          1.000000
    max          2.000000
    Name: SINK, dtype: float64
SMP: Total payment on all second and junior mortgages and home equity loans (monthly amount)
    ['Note: Use ADJHSG to adjust SMP to constant dollars.']
    bbbbb: N/A (GQ/vacant/not owned or being bought/ no second or junior mortgages or home equity loans)
    00001..99999: $1 to $99999 (Rounded and top-coded)
    count    1228.000000
    mean      506.485342
    std       569.513130
    min         4.000000
    25%       170.000000
    50%       350.000000
    75%       600.000000
    max      4100.000000
    Name: SMP, dtype: float64
STOV: Stove or range
    b: N/A (GQ)
    1: Yes
    2: No
    count    14844.000000
    mean         1.010105
    std          0.100018
    min          1.000000
    25%          1.000000
    50%          1.000000
    75%          1.000000
    max          2.000000
    Name: STOV, dtype: float64
TEL: Telephone
    ['NOTE: Problems in the collection of data on the availability of telephone service (TEL) in 2012 led to suppressing this variable in six PUMAs in Georgia. This only affects 2012 vintage data. See the Estimation section of the Accuracy of the Data for the 2009-2013 5-year PUMS for more information on PUMS estimates using TEL.']
    b: N/A (GQ/vacant)
    1: Yes
    2: No
    count    13737.000000
    mean         1.029701
    std          0.169767
    min          1.000000
    25%          1.000000
    50%          1.000000
    75%          1.000000
    max          2.000000
    Name: TEL, dtype: float64
TEN: Tenure
    b: N/A (GQ/vacant)
    1: Owned with mortgage or loan (include home equity loans)
    2: Owned free and clear
    count    13737.000000
    mean         2.174128
    std          0.945958
    min          1.000000
    25%          1.000000
    50%          3.000000
    75%          3.000000
    max          4.000000
    Name: TEN, dtype: float64
TOIL: Flush toilet
    b: N/A (GQ)
    1: Yes
    2: No
    count    14844.000000
    mean         1.005591
    std          0.074569
    min          1.000000
    25%          1.000000
    50%          1.000000
    75%          1.000000
    max          2.000000
    Name: TOIL, dtype: float64
VACS: Vacancy status
    b: N/A (GQ/occupied)
    1: For rent
    2: Rented, not occupied
    count    1107.000000
    mean        4.214995
    std         2.565742
    min         1.000000
    25%         1.000000
    50%         4.000000
    75%         7.000000
    max         7.000000
    Name: VACS, dtype: float64
VALP: Property value
    bbbbbbb: N/A (GQ/vacant units, except �for-sale-only� and �sold, not occupied�/not owned or being bought)
    0000000: $0 (applies to 2009 and 2010 only)
    0000001..9999999: $1 to $9999999 (Rounded and top-coded)
    count       6741.000000
    mean      576821.155615
    std       578742.887940
    min          180.000000
    25%       290000.000000
    50%       410000.000000
    75%       700000.000000
    max      5303000.000000
    Name: VALP, dtype: float64
VEH: Vehicles (1 ton or less) available
    b: N/A (GQ/vacant)
    0: No vehicles
    1: 1 vehicle
    count    13737.000000
    mean         0.925311
    std          0.869523
    min          0.000000
    25%          0.000000
    50%          1.000000
    75%          1.000000
    max          6.000000
    Name: VEH, dtype: float64
WATP: Water (yearly cost)
    ['Note: Use values from ADJHSG to adjust WATP values 3 and over to constant dollars.']
    bbbb: N/A (GQ/vacant)
    0001: Included in rent or in condo fee
    0002: No charge
    count    13737.000000
    mean       239.380724
    std        412.715268
    min          1.000000
    25%          1.000000
    50%          2.000000
    75%        390.000000
    max       3900.000000
    Name: WATP, dtype: float64
YBL: When structure first built
    bb: N/A (GQ)
    01: 1939 or earlier
    02: 1940 to 1949
    count    14844.000000
    mean         3.239289
    std          2.768117
    min          1.000000
    25%          1.000000
    50%          2.000000
    75%          4.000000
    max         16.000000
    Name: YBL, dtype: float64
FES: Family type and employment status
    b: N/A (GQ/vacant/not a family)
    1: Married-couple family: Husband and wife in LF
    2: Married-couple family: Husband in labor force, wife not in LF
    count    5929.000000
    mean        3.998988
    std         2.787092
    min         1.000000
    25%         1.000000
    50%         4.000000
    75%         7.000000
    max         8.000000
    Name: FES, dtype: float64
FINCP: Family income (past 12 months)
    ['Note: Use values from ADJINC to adjust FINCP to constant dollars.']
    bbbbbbbb: N/A (GQ/vacant)
    00000000: No family income
    -0059999: Loss of -$59,999 or more
    count       5953.000000
    mean      130596.028725
    std       154445.776061
    min            0.000000
    25%        36000.000000
    50%        84500.000000
    75%       166000.000000
    max      2087000.000000
    Name: FINCP, dtype: float64
FPARC: Family presence and age of related children
    b: N/A (GQ/vacant/not a family)
    1: With related children under 5 years only
    2: With related children 5 to 17 years only
    count    5953.000000
    mean        3.076432
    std         1.124643
    min         1.000000
    25%         2.000000
    50%         4.000000
    75%         4.000000
    max         4.000000
    Name: FPARC, dtype: float64
GRNTP: Gross rent (monthly amount)
    ['Note: Use values from ADJHSG to adjust GRNTP to constant dollars.']
    bbbbb: N/A (GQ/vacant/not a rental unit/occupied without rent payment)
    00001..99999: $1 - $99999 (Components are rounded)
    count    6989.000000
    mean     1349.524109
    std       791.201141
    min         4.000000
    25%       819.000000
    50%      1200.000000
    75%      1800.000000
    max      4510.000000
    Name: GRNTP, dtype: float64
GRPIP: Gross rent as a percentage of household income past 12 months
    bbb: N/A (GQ/vacant/not a rental unit/occupied without rent payment/no household income)
    001..100: 1% to 100%
    101: 101% or more
    count    6805.000000
    mean       39.897575
    std        28.575415
    min         1.000000
    25%        20.000000
    50%        29.000000
    75%        51.000000
    max       101.000000
    Name: GRPIP, dtype: float64
HHL: Household language
    b: N/A (GQ/vacant)
    1: English only
    2: Spanish
    count    13737.000000
    mean         1.336245
    std          0.850583
    min          1.000000
    25%          1.000000
    50%          1.000000
    75%          1.000000
    max          5.000000
    Name: HHL, dtype: float64
HHT: Household/family type
    b: N/A (GQ/vacant)
    1: Married couple household
    2: Other family household:Male householder, no wife present
    count    13737.000000
    mean         3.805198
    std          2.036160
    min          1.000000
    25%          2.000000
    50%          4.000000
    75%          6.000000
    max          7.000000
    Name: HHT, dtype: float64
HINCP: Household income (past 12 months)
    ['Note: Use values from ADJINC to adjust HINCP to constant dollars.']
    bbbbbbbb: N/A(GQ/vacant)
    00000000: No household income
    -0059999: Loss of -$59,999 or more
    count      13737.000000
    mean      102051.604353
    std       125888.164393
    min       -13600.000000
    25%        29200.000000
    50%        67000.000000
    75%       128000.000000
    max      2087000.000000
    Name: HINCP, dtype: float64
HUGCL: Household with grandparent living with grandchildren
    b: N/A (GQ/vacant)
    0: Household without grandparent living with grandchildren
    1: Household with grandparent living with grandchildren
    count    13737.000000
    mean         0.033559
    std          0.180098
    min          0.000000
    25%          0.000000
    50%          0.000000
    75%          0.000000
    max          1.000000
    Name: HUGCL, dtype: float64
HUPAC: HH presence and age of children
    b: N/A (GQ/vacant)
    1: With children under 6 years only
    2: With children 6 to 17 years only
    count    13737.000000
    mean         3.591541
    std          0.888889
    min          1.000000
    25%          4.000000
    50%          4.000000
    75%          4.000000
    max          4.000000
    Name: HUPAC, dtype: float64
HUPAOC: HH presence and age of own children
    b: N/A (GQ/vacant)
    1: Presence of own children under 6 years only
    2: Presence of own children 6 to 17 years only
    count    13737.000000
    mean         3.652617
    std          0.836272
    min          1.000000
    25%          4.000000
    50%          4.000000
    75%          4.000000
    max          4.000000
    Name: HUPAOC, dtype: float64
HUPARC: HH presence and age of related children
    b: N/A (GQ/vacant)
    1: Presence of related children under 6 years only
    2: Presence of related children 6 to 17 years only
    count    13737.000000
    mean         3.594599
    std          0.886682
    min          1.000000
    25%          4.000000
    50%          4.000000
    75%          4.000000
    max          4.000000
    Name: HUPARC, dtype: float64
KIT: Complete kitchen facilities
    b: N/A (GQ)
    1: Yes, has stove or range, refrigerator, and sink with a faucet
    2: No
    count    14844.000000
    mean         1.011587
    std          0.107022
    min          1.000000
    25%          1.000000
    50%          1.000000
    75%          1.000000
    max          2.000000
    Name: KIT, dtype: float64
LNGI: Limited English speaking households
    b: N/A (GQ/vacant)
    1: At least one person in the household 14 and over speaks English only or speaks English 'very well'
    2: No one in the household 14 and over speaks English only or speaks English 'very well'
    count    13737.000000
    mean         1.023368
    std          0.151073
    min          1.000000
    25%          1.000000
    50%          1.000000
    75%          1.000000
    max          2.000000
    Name: LNGI, dtype: float64
MULTG: Multigenerational Household
    b: N/A (GQ/Vacant/NP=0)
    1: No, not a multigenerational household
    2: Yes, is a multigenerational household
    count    13737.000000
    mean         1.032977
    std          0.178582
    min          1.000000
    25%          1.000000
    50%          1.000000
    75%          1.000000
    max          2.000000
    Name: MULTG, dtype: float64
MV: When moved into this house or apartment
    b: N/A (GQ/vacant)
    1: 12 months or less
    2: 13 to 23 months
    count    13737.000000
    mean         3.800466
    std          1.936343
    min          1.000000
    25%          2.000000
    50%          4.000000
    75%          5.000000
    max          7.000000
    Name: MV, dtype: float64
NOC: Number of own children in household (unweighted)
    bb: N/A(GQ/vacant)
    00: No own children
    01..19: Number of own children in household
    count    13737.000000
    mean         0.277572
    std          0.727938
    min          0.000000
    25%          0.000000
    50%          0.000000
    75%          0.000000
    max          8.000000
    Name: NOC, dtype: float64
NPF: Number of persons in family (unweighted)
    bb: N/A (GQ/vacant/non-family household)
    02..20: Number of persons in family
    count    5953.000000
    mean        2.893331
    std         1.224969
    min         2.000000
    25%         2.000000
    50%         2.000000
    75%         3.000000
    max        12.000000
    Name: NPF, dtype: float64
NPP: Grandparent headed household with no parent present
    b: N/A (GQ/vacant)
    0: Not a grandparent headed household with no parent present
    1: Grandparent headed household with no parent present
    count    13737.000000
    mean         0.004586
    std          0.067568
    min          0.000000
    25%          0.000000
    50%          0.000000
    75%          0.000000
    max          1.000000
    Name: NPP, dtype: float64
NR: Presence of nonrelative in household
    b: N/A (GQ/vacant)
    0: None
    1: 1 or more nonrelatives
    count    13737.000000
    mean         0.159132
    std          0.365813
    min          0.000000
    25%          0.000000
    50%          0.000000
    75%          0.000000
    max          1.000000
    Name: NR, dtype: float64
NRC: Number of related children in household (unweighted)
    bb: N/A (GQ/vacant)
    00: No related children
    01..19: Number of related children in household
    count    13737.000000
    mean         0.333552
    std          0.806915
    min          0.000000
    25%          0.000000
    50%          0.000000
    75%          0.000000
    max         10.000000
    Name: NRC, dtype: float64
OCPIP: Selected monthly owner costs as a percentage of household income during the past 12 months
    bbb: N/A (GQ/vacant/not owned or being bought/ no household income)
    001..100: 1% to 100%
    101: 101% or more
    count    6500.000000
    mean       26.966923
    std        23.259257
    min         1.000000
    25%        12.000000
    50%        20.000000
    75%        32.000000
    max       101.000000
    Name: OCPIP, dtype: float64
PARTNER: Unmarried partner household
    b: N/A (GQ/vacant)
    0: No unmarried partner in household
    1: Male householder, male partner
    count    13737.000000
    mean         0.188032
    std          0.751881
    min          0.000000
    25%          0.000000
    50%          0.000000
    75%          0.000000
    max          4.000000
    Name: PARTNER, dtype: float64
PLM: Complete plumbing facilities
    b: N/A (GQ)
    1: Yes, has hot and cold running water, a flush toilet, and a bathtub or shower
    2: No
    count    14844.000000
    mean         1.008690
    std          0.092819
    min          1.000000
    25%          1.000000
    50%          1.000000
    75%          1.000000
    max          2.000000
    Name: PLM, dtype: float64
PSF: Presence of subfamilies in Household
    b: N/A (GQ/vacant)
    0: No subfamilies
    1: 1 or more subfamilies
    count    13737.000000
    mean         0.026789
    std          0.161472
    min          0.000000
    25%          0.000000
    50%          0.000000
    75%          0.000000
    max          1.000000
    Name: PSF, dtype: float64
R18: Presence of persons under 18 years in household (unweighted)
    b: N/A (GQ/vacant)
    0: No person under 18 in household
    1: 1 or more persons under 18 in household
    count    13737.000000
    mean         0.193638
    std          0.395163
    min          0.000000
    25%          0.000000
    50%          0.000000
    75%          0.000000
    max          1.000000
    Name: R18, dtype: float64
R60: Presence of persons 60 years and over in household (unweighted)
    b: N/A (GQ/vacant)
    0: No person 60 and over
    1: 1 person 60 and over
    count    13737.000000
    mean         0.415447
    std          0.647227
    min          0.000000
    25%          0.000000
    50%          0.000000
    75%          1.000000
    max          2.000000
    Name: R60, dtype: float64
R65: Presence of persons 65 years and over in household (unweighted)
    b: N/A (GQ/vacant)
    0: No person 65 and over
    1: 1 person 65 and over
    count    13737.000000
    mean         0.297809
    std          0.568071
    min          0.000000
    25%          0.000000
    50%          0.000000
    75%          0.000000
    max          2.000000
    Name: R65, dtype: float64
RESMODE: Response mode
    b: N/A (GQ)
    1: Mail
    2: CATI/CAPI
    count    14844.000000
    mean         1.523579
    std          0.657194
    min          1.000000
    25%          1.000000
    50%          1.000000
    75%          2.000000
    max          3.000000
    Name: RESMODE, dtype: float64
SMOCP: Selected monthly owner costs
    ['Note: Use values from ADJHSG to adjust SMOCP to constant dollars.']
    bbbbb: N/A (GQ/vacant/not owned or being bought)
    00000: None
    00001..99999: $1 - $99999 (Components are rounded)
    count     6561.000000
    mean      2099.427526
    std       1528.415666
    min          0.000000
    25%        956.000000
    50%       1821.000000
    75%       2810.000000
    max      12113.000000
    Name: SMOCP, dtype: float64
SMX: Second or junior mortgage or home equity loan status
    b: N/A (GQ/vacant/not owned or being bought)
    1: Yes, a second mortgage
    2: Yes, a home equity loan
    count    4971.000000
    mean        2.709113
    std         0.597341
    min         1.000000
    25%         3.000000
    50%         3.000000
    75%         3.000000
    max         4.000000
    Name: SMX, dtype: float64
SRNT: Specified rent unit
    b: N/A
    0: Not specified rent unit
    1: Specified rent unit
    count    14844.000000
    mean         0.509229
    std          0.499932
    min          0.000000
    25%          0.000000
    50%          1.000000
    75%          1.000000
    max          1.000000
    Name: SRNT, dtype: float64
SVAL: Specified value owner unit
    b: N/A
    0: Not specified value owner unit
    1: Specified value owner unit
    count    14844.000000
    mean         0.328887
    std          0.469825
    min          0.000000
    25%          0.000000
    50%          0.000000
    75%          1.000000
    max          1.000000
    Name: SVAL, dtype: float64
TAXP: Property taxes (yearly amount)
    ['Note: No adjustment factor is applied to TAXP.']
    bb: N/A (GQ/vacant/not owned or being bought)
    01: None
    02: $   1 - $  49
    count    6561.000000
    mean       36.513032
    std        19.481382
    min         1.000000
    25%        22.000000
    50%        33.000000
    75%        53.000000
    max        68.000000
    Name: TAXP, dtype: float64
WIF: Workers in family during the past 12 months
    b: N/A (GQ/vacant/non-family household)
    0: No workers
    1: 1 worker
    count    5953.000000
    mean        1.432555
    std         0.824398
    min         0.000000
    25%         1.000000
    50%         2.000000
    75%         2.000000
    max         3.000000
    Name: WIF, dtype: float64
WKEXREL: Work experience of householder and spouse
    bb: N/A (GQ/vacant/not a family)
    01: Householder and spouse worked FT
    02: Householder worked FT; spouse worked < FT
    count    5953.000000
    mean        7.787166
    std         5.420605
    min         1.000000
    25%         2.000000
    50%         8.000000
    75%        13.000000
    max        15.000000
    Name: WKEXREL, dtype: float64
WORKSTAT: Work status of householder or spouse in family households
    bb: N/A (GQ/not a family household)
    01: Husband and wife both in labor force, both employed or in Armed Forces
    02: Husband and wife both in labor force, husband employed or in Armed Forces, wife unemployed
    count    5929.000000
    mean        7.442570
    std         5.594645
    min         1.000000
    25%         1.000000
    50%         9.000000
    75%        13.000000
    max        15.000000
    Name: WORKSTAT, dtype: float64
num columns described = 84

PUMS estimates for user verification

In [11]:
print("`dfe`: Estimates for user verification filtered for 'District of Columbia'.")
dfe = pd.read_csv(path_ecsv)
tfmask_dc = dfe['state'] == 'District of Columbia'
dfe_dc = dfe.loc[tfmask_dc]
`dfe`: Estimates for user verification filtered for 'District of Columbia'.
st state characteristic pums_est_09_to_13 pums_se_09_to_13 pums_moe_09_to_13
288 11 District of Columbia Total population 619,371 0 0
289 11 District of Columbia Housing unit population (RELP=0-15) 579,281 0 0
290 11 District of Columbia GQ population (RELP=16-17) 40,090 0 0
291 11 District of Columbia GQ institutional population (RELP=16) 7,443 80 132
292 11 District of Columbia GQ noninstitutional population (RELP=17) 32,647 80 132
293 11 District of Columbia Total males (SEX=1) 292,566 361 595
294 11 District of Columbia Total females (SEX=2) 326,805 361 595
295 11 District of Columbia Age 0-4 36,530 253 417
296 11 District of Columbia Age 5-9 27,658 636 1046
297 11 District of Columbia Age 10-14 24,621 598 984
298 11 District of Columbia Age 15-19 40,950 825 1357
299 11 District of Columbia Age 20-24 58,828 779 1281
300 11 District of Columbia Age 25-34 134,025 526 865
301 11 District of Columbia Age 35-44 84,310 534 878
302 11 District of Columbia Age 45-54 75,981 435 716
303 11 District of Columbia Age 55-59 35,191 599 985
304 11 District of Columbia Age 60-64 31,070 590 970
305 11 District of Columbia Age 65-74 38,245 295 485
306 11 District of Columbia Age 75-84 22,283 420 690
307 11 District of Columbia Age 85 and over 9,679 377 619
308 11 District of Columbia Total housing units (TYPE=1) 298,327 113 185
309 11 District of Columbia Total occupied units 263,650 965 1588
310 11 District of Columbia Owner occupied units (TEN in 1,2) 110,362 1363 2242
311 11 District of Columbia Renter occupied units (TEN in 3,4) 153,288 1486 2444
312 11 District of Columbia Owned with a mortgage (TEN=1) 85,483 1208 1988
313 11 District of Columbia Owned free and clear (TEN=2) 24,879 565 929
314 11 District of Columbia Rented for cash (TEN=3) 149,500 1511 2485
315 11 District of Columbia No cash rent (TEN=4) 3,788 262 431
316 11 District of Columbia Total vacant units 34,677 920 1514
317 11 District of Columbia For rent (VACS=1) 10,686 618 1017
318 11 District of Columbia For sale only (VACS=3) 2,953 325 534
319 11 District of Columbia All Other Vacant (VACS in 2,4,5,6,7) 21,038 849 1397
In [12]:
print("`dfe`: Verify characteristic estimates, direct standard errors, and margin of error.")
# Verify the estimates following
#     technical-documentation/pums/documentation.2013.html
#     tech_docs/pums/accuracy/2009_2013AccuracyPUMS.pdf
tfmask_test_strs = collections.OrderedDict([
    ('PERSON RECORD', collections.OrderedDict([
        ('Total population', "np.asarray([True]*len(dfp))"),
        ('Housing unit population (RELP=0-15)',"np.logical_and(0 <= dfp['RELP'], dfp['RELP'] <= 15)"),
        ('GQ population (RELP=16-17)', "np.logical_and(16 <= dfp['RELP'], dfp['RELP'] <= 17)"),
        ('GQ institutional population (RELP=16)', "dfp['RELP'] == 16"),
        ('GQ noninstitutional population (RELP=17)', "dfp['RELP'] == 17"),
        ('Total males (SEX=1)', "dfp['SEX'] == 1"),
        ('Total females (SEX=2)', "dfp['SEX'] == 2"),
        ('Age 0-4', "np.logical_and(0 <= dfp['AGEP'], dfp['AGEP'] <= 4)"),
        ('Age 5-9', "np.logical_and(5 <= dfp['AGEP'], dfp['AGEP'] <= 9)"),
        ('Age 10-14', "np.logical_and(10 <= dfp['AGEP'], dfp['AGEP'] <= 14)"),
        ('Age 15-19', "np.logical_and(15 <= dfp['AGEP'], dfp['AGEP'] <= 19)"),
        ('Age 20-24', "np.logical_and(20 <= dfp['AGEP'], dfp['AGEP'] <= 24)"),
        ('Age 25-34', "np.logical_and(25 <= dfp['AGEP'], dfp['AGEP'] <= 34)"),
        ('Age 35-44', "np.logical_and(35 <= dfp['AGEP'], dfp['AGEP'] <= 44)"),
        ('Age 45-54', "np.logical_and(45 <= dfp['AGEP'], dfp['AGEP'] <= 54)"),
        ('Age 55-59', "np.logical_and(55 <= dfp['AGEP'], dfp['AGEP'] <= 59)"),
        ('Age 60-64', "np.logical_and(60 <= dfp['AGEP'], dfp['AGEP'] <= 64)"),
        ('Age 65-74', "np.logical_and(65 <= dfp['AGEP'], dfp['AGEP'] <= 74)"),
        ('Age 75-84', "np.logical_and(75 <= dfp['AGEP'], dfp['AGEP'] <= 84)"),
        ('Age 85 and over', "85 <= dfp['AGEP']")])),
    ('HOUSING RECORD', collections.OrderedDict([
        ('Total housing units (TYPE=1)', "dfh['TYPE'] == 1"),
        ('Total occupied units', "dfh['TEN'].notnull()"),
        ('Owner occupied units (TEN in 1,2)', "np.logical_or(dfh['TEN'] == 1, dfh['TEN'] == 2)"),
        ('Renter occupied units (TEN in 3,4)', "np.logical_or(dfh['TEN'] == 3, dfh['TEN'] == 4)"),
        ('Owned with a mortgage (TEN=1)', "dfh['TEN'] == 1"),
        ('Owned free and clear (TEN=2)', "dfh['TEN'] == 2"),
        ('Rented for cash (TEN=3)', "dfh['TEN'] == 3"),
        ('No cash rent (TEN=4)', "dfh['TEN'] == 4"),
        ('Total vacant units', "dfh['TEN'].isnull()"),
        ('For rent (VACS=1)', "dfh['VACS'] == 1"),
        ('For sale only (VACS=3)', "dfh['VACS'] == 3"),
        ('All Other Vacant (VACS in 2,4,5,6,7)',
         "functools.reduce(np.logical_or, (dfh['VACS'] == vacs for vacs in [2,4,5,6,7]))")]))])
for record_type in records_dfs:
    df = records_dfs[record_type]['dataframe']
    wt = records_dfs[record_type]['weight']
    wts = records_dfs[record_type]['replicate_weights']
    for char in tfmask_test_strs[record_type]:
        print("    '{char}'".format(char=char))
        # Select the reference verification data
        # and the records for the characteristic.
        tfmask_ref = dfe_dc['characteristic'] == char        
        tfmask_test = eval(tfmask_test_strs[record_type][char])
        # Calculate and verify the estimate ('est') for the characteristic.
        # The estimate is the sum of the sample weights 'WGTP'.
        col = 'pums_est_09_to_13'
        print("        '{col}':".format(col=col), end=' ')
        ref_est = int(dfe_dc.loc[tfmask_ref, col].values[0].replace(',', ''))
        test_est = df.loc[tfmask_test, wt].sum()
        assert np.isclose(ref_est, test_est, rtol=0, atol=1)
        print("(ref, test) = {tup}".format(tup=(ref_est, test_est)))
        # Calculate and verify the "direct standard error" ('se') of the estimate.
        # The direct standard error is a modified root-mean-square deviation
        # using the "replicate weights" 'WGTP[1-80]'.
        col = 'pums_se_09_to_13'
        print("        '{col}' :".format(col=col), end=' ')
        ref_se = dfe_dc.loc[tfmask_ref, col].values[0]
        test_se = ((4/80)*((df.loc[tfmask_test, wts].sum() - test_est)**2).sum())**0.5
        assert np.isclose(ref_se, test_se, rtol=0, atol=1)
        print("(ref, test) = {tup}".format(tup=(ref_se, test_se)))
        # Calculate and verify the margin of error ('moe') at the
        # 90% confidence level (+/- 1.645 standard errors).
        col = 'pums_moe_09_to_13'
        print("        '{col}':".format(col=col), end=' ')
        ref_moe = dfe_dc.loc[tfmask_ref, col].values[0]
        test_moe = 1.645*test_se
        assert np.isclose(ref_moe, test_moe, rtol=0, atol=1)
        print("(ref, test) = {tup}".format(tup=(ref_moe, test_moe)))
`dfe`: Verify characteristic estimates, direct standard errors, and margin of error.

    'Total population'
        'pums_est_09_to_13': (ref, test) = (619371, 619371)
        'pums_se_09_to_13' : (ref, test) = (0, 0.0)
        'pums_moe_09_to_13': (ref, test) = (0, 0.0)
    'Housing unit population (RELP=0-15)'
        'pums_est_09_to_13': (ref, test) = (579281, 579281)
        'pums_se_09_to_13' : (ref, test) = (0, 0.0)
        'pums_moe_09_to_13': (ref, test) = (0, 0.0)
    'GQ population (RELP=16-17)'
        'pums_est_09_to_13': (ref, test) = (40090, 40090)
        'pums_se_09_to_13' : (ref, test) = (0, 0.0)
        'pums_moe_09_to_13': (ref, test) = (0, 0.0)
    'GQ institutional population (RELP=16)'
        'pums_est_09_to_13': (ref, test) = (7443, 7443)
        'pums_se_09_to_13' : (ref, test) = (80, 80.30971298666184)
        'pums_moe_09_to_13': (ref, test) = (132, 132.10947786305871)
    'GQ noninstitutional population (RELP=17)'
        'pums_est_09_to_13': (ref, test) = (32647, 32647)
        'pums_se_09_to_13' : (ref, test) = (80, 80.30971298666184)
        'pums_moe_09_to_13': (ref, test) = (132, 132.10947786305871)
    'Total males (SEX=1)'
        'pums_est_09_to_13': (ref, test) = (292566, 292566)
        'pums_se_09_to_13' : (ref, test) = (361, 361.4210148843036)
        'pums_moe_09_to_13': (ref, test) = (595, 594.5375694846794)
    'Total females (SEX=2)'
        'pums_est_09_to_13': (ref, test) = (326805, 326805)
        'pums_se_09_to_13' : (ref, test) = (361, 361.4210148843036)
        'pums_moe_09_to_13': (ref, test) = (595, 594.5375694846794)
    'Age 0-4'
        'pums_est_09_to_13': (ref, test) = (36530, 36530)
        'pums_se_09_to_13' : (ref, test) = (253, 253.37699185206222)
        'pums_moe_09_to_13': (ref, test) = (417, 416.80515159664236)
    'Age 5-9'
        'pums_est_09_to_13': (ref, test) = (27658, 27658)
        'pums_se_09_to_13' : (ref, test) = (636, 635.5916141674621)
        'pums_moe_09_to_13': (ref, test) = (1046, 1045.5482053054752)
    'Age 10-14'
        'pums_est_09_to_13': (ref, test) = (24621, 24621)
        'pums_se_09_to_13' : (ref, test) = (598, 598.0936799532328)
        'pums_moe_09_to_13': (ref, test) = (984, 983.864103523068)
    'Age 15-19'
        'pums_est_09_to_13': (ref, test) = (40950, 40950)
        'pums_se_09_to_13' : (ref, test) = (825, 825.0349386541154)
        'pums_moe_09_to_13': (ref, test) = (1357, 1357.18247408602)
    'Age 20-24'
        'pums_est_09_to_13': (ref, test) = (58828, 58828)
        'pums_se_09_to_13' : (ref, test) = (779, 778.715930233869)
        'pums_moe_09_to_13': (ref, test) = (1281, 1280.9877052347144)
    'Age 25-34'
        'pums_est_09_to_13': (ref, test) = (134025, 134025)
        'pums_se_09_to_13' : (ref, test) = (526, 525.9921102069878)
        'pums_moe_09_to_13': (ref, test) = (865, 865.257021290495)
    'Age 35-44'
        'pums_est_09_to_13': (ref, test) = (84310, 84310)
        'pums_se_09_to_13' : (ref, test) = (534, 533.5205244411877)
        'pums_moe_09_to_13': (ref, test) = (878, 877.6412627057538)
    'Age 45-54'
        'pums_est_09_to_13': (ref, test) = (75981, 75981)
        'pums_se_09_to_13' : (ref, test) = (435, 435.0808545546448)
        'pums_moe_09_to_13': (ref, test) = (716, 715.7080057423907)
    'Age 55-59'
        'pums_est_09_to_13': (ref, test) = (35191, 35191)
        'pums_se_09_to_13' : (ref, test) = (599, 598.5786915686191)
        'pums_moe_09_to_13': (ref, test) = (985, 984.6619476303784)
    'Age 60-64'
        'pums_est_09_to_13': (ref, test) = (31070, 31070)
        'pums_se_09_to_13' : (ref, test) = (590, 589.6810154651412)
        'pums_moe_09_to_13': (ref, test) = (970, 970.0252704401572)
    'Age 65-74'
        'pums_est_09_to_13': (ref, test) = (38245, 38245)
        'pums_se_09_to_13' : (ref, test) = (295, 295.0997289053313)
        'pums_moe_09_to_13': (ref, test) = (485, 485.43905404927)
    'Age 75-84'
        'pums_est_09_to_13': (ref, test) = (22283, 22283)
        'pums_se_09_to_13' : (ref, test) = (420, 419.69280432239964)
        'pums_moe_09_to_13': (ref, test) = (690, 690.3946631103474)
    'Age 85 and over'
        'pums_est_09_to_13': (ref, test) = (9679, 9679)
        'pums_se_09_to_13' : (ref, test) = (377, 376.5637396245156)
        'pums_moe_09_to_13': (ref, test) = (619, 619.4473516823282)
    'Total housing units (TYPE=1)'
        'pums_est_09_to_13': (ref, test) = (298327, 298327)
        'pums_se_09_to_13' : (ref, test) = (113, 112.68873058118989)
        'pums_moe_09_to_13': (ref, test) = (185, 185.37296180605736)
    'Total occupied units'
        'pums_est_09_to_13': (ref, test) = (263650, 263650)
        'pums_se_09_to_13' : (ref, test) = (965, 965.0778984102786)
        'pums_moe_09_to_13': (ref, test) = (1588, 1587.5531428849083)
    'Owner occupied units (TEN in 1,2)'
        'pums_est_09_to_13': (ref, test) = (110362, 110362)
        'pums_se_09_to_13' : (ref, test) = (1363, 1363.1910174293257)
        'pums_moe_09_to_13': (ref, test) = (2242, 2242.449223671241)
    'Renter occupied units (TEN in 3,4)'
        'pums_est_09_to_13': (ref, test) = (153288, 153288)
        'pums_se_09_to_13' : (ref, test) = (1486, 1485.6482760061347)
        'pums_moe_09_to_13': (ref, test) = (2444, 2443.8914140300917)
    'Owned with a mortgage (TEN=1)'
        'pums_est_09_to_13': (ref, test) = (85483, 85483)
        'pums_se_09_to_13' : (ref, test) = (1208, 1208.399126944405)
        'pums_moe_09_to_13': (ref, test) = (1988, 1987.8165638235462)
    'Owned free and clear (TEN=2)'
        'pums_est_09_to_13': (ref, test) = (24879, 24879)
        'pums_se_09_to_13' : (ref, test) = (565, 565.0110618386157)
        'pums_moe_09_to_13': (ref, test) = (929, 929.4431967245227)
    'Rented for cash (TEN=3)'
        'pums_est_09_to_13': (ref, test) = (149500, 149500)
        'pums_se_09_to_13' : (ref, test) = (1511, 1510.8262970970554)
        'pums_moe_09_to_13': (ref, test) = (2485, 2485.309258724656)
    'No cash rent (TEN=4)'
        'pums_est_09_to_13': (ref, test) = (3788, 3788)
        'pums_se_09_to_13' : (ref, test) = (262, 262.1715087495207)
        'pums_moe_09_to_13': (ref, test) = (431, 431.2721318929615)
    'Total vacant units'
        'pums_est_09_to_13': (ref, test) = (34677, 34677)
        'pums_se_09_to_13' : (ref, test) = (920, 920.3688391074527)
        'pums_moe_09_to_13': (ref, test) = (1514, 1514.0067403317596)
    'For rent (VACS=1)'
        'pums_est_09_to_13': (ref, test) = (10686, 10686)
        'pums_se_09_to_13' : (ref, test) = (618, 618.3948172486571)
        'pums_moe_09_to_13': (ref, test) = (1017, 1017.2594743740409)
    'For sale only (VACS=3)'
        'pums_est_09_to_13': (ref, test) = (2953, 2953)
        'pums_se_09_to_13' : (ref, test) = (325, 324.77245880770124)
        'pums_moe_09_to_13': (ref, test) = (534, 534.2506947386686)
    'All Other Vacant (VACS in 2,4,5,6,7)'
        'pums_est_09_to_13': (ref, test) = (21038, 21038)
        'pums_se_09_to_13' : (ref, test) = (849, 849.4756618055635)
        'pums_moe_09_to_13': (ref, test) = (1397, 1397.3874636701519)

Export ipynb to html

In [13]:
# Export ipynb to html
for template in ['basic', 'full']:
    path_html = os.path.splitext(path_ipynb)[0]+'-'+template+'.html'
    cmd = ['jupyter', 'nbconvert', '--to', 'html', '--template', template, path_ipynb, '--output', path_html]
    print(' '.join(cmd)), check=True)
jupyter nbconvert --to html --template basic /home/samuel_harrold/ --output /home/samuel_harrold/

jupyter nbconvert --to html --template full /home/samuel_harrold/ --output /home/samuel_harrold/