Related post:
https://stharrold.github.io/20160110-etl-census-with-python.html
Data documentation:
https://www.census.gov/programs-surveys/acs/technical-documentation/pums/documentation.2013.html
cd ~
/home/samuel_harrold
# Import standard packages.
import collections
import functools
import os
import pdb # Debug with pdb.
import subprocess
import sys
import time
# Import installed packages.
import numpy as np
import pandas as pd
# Import local packages.
# Insert current directory into module search path.
# Autoreload local packages after editing.
# `dsdemos` version: https://github.com/stharrold/dsdemos/releases/tag/v0.0.3
sys.path.insert(0, os.path.join(os.path.curdir, r'dsdemos'))
%reload_ext autoreload
%autoreload 2
import dsdemos as dsd
print("Timestamp:")
print(time.strftime(r'%Y-%m-%dT%H:%M:%S%Z', time.gmtime()))
print()
print("Versions:")
print("Python:", sys.version_info)
print("numpy:", np.__version__)
print("pandas:", pd.__version__)
Timestamp: 2016-02-08T04:30:52GMT Versions: Python: sys.version_info(major=3, minor=5, micro=1, releaselevel='final', serial=0) numpy: 1.10.2 pandas: 0.17.1
File sources:
# File paths
path_static = os.path.join(os.path.expanduser(r'~'), r'stharrold.github.io/content/static')
basename = r'20160110-etl-census-with-python'
filename = basename
path_ipynb = os.path.join(path_static, basename, filename+'.ipynb')
path_disk = os.path.abspath(r'/mnt/disk-20151227t211000z/')
path_acs = os.path.join(path_disk, r'www2-census-gov/programs-surveys/acs/')
path_pcsv = os.path.join(path_acs, r'data/pums/2013/5-Year/ss13pdc.csv') # 'pdc' = 'person DC'
path_hcsv = os.path.join(path_acs, r'data/pums/2013/5-Year/ss13hdc.csv') # 'hdc' = 'housing DC'
path_ecsv = os.path.join(path_acs, r'tech_docs/pums/estimates/pums_estimates_9_13.csv')
path_dtxt = os.path.join(path_acs, r'tech_docs/pums/data_dict/PUMS_Data_Dictionary_2009-2013.txt')
# Weights
pwt = 'PWGTP' # person weight
pwts = [pwt+str(inum) for inum in range(1, 81)]
hwt = 'WGTP' # housing weight
hwts = [hwt+str(inum) for inum in range(1, 81)]
print("`ddict`: Load the data dictionary and display the hierarchical structure.")
# Only `ddict` is used below.
# The hierarchical data frame is only for display.
ddict = dsd.census.parse_pumsdatadict(path=path_dtxt)
tmp = dict()
for record_type in ddict['record_types']:
tmp[record_type] = pd.DataFrame.from_dict(ddict['record_types'][record_type], orient='index')
pd.concat(tmp, names=['record_type', 'var_name']).head()
`ddict`: Load the data dictionary and display the hierarchical structure.
length | description | var_codes | notes | ||
---|---|---|---|---|---|
record_type | var_name | ||||
HOUSING RECORD | ACR | 1 | Lot size | {'b': 'N/A (GQ/not a one-family house or mobil... | NaN |
ADJHSG | 7 | Adjustment factor for housing dollar amounts (... | {'1086032': '2009 factor', '1068395': '2010 fa... | [Note: The values of ADJHSG inflation-adjusts ... | |
ADJINC | 7 | Adjustment factor for income and earnings doll... | {'1085467': '2009 factor (0.999480 * 1.0860317... | [Note: The values of ADJINC inflation-adjusts ... | |
AGS | 1 | Sales of Agriculture Products (Yearly sales) | {'b': 'N/A (GQ/vacant/not a one-family house o... | [Note: No adjustment factor is applied to AGS.] | |
BATH | 1 | Bathtub or shower | {'b': 'N/A (GQ)', '1': 'Yes', '2': 'No'} | NaN |
print("`ddict`: First 10 unstructured notes from end of file.")
ddict['notes'][:10]
`ddict`: First 10 unstructured notes from end of file.
['* In cases where the SOC occupation code ends in X(s) or Y(s), two or more SOC', 'occupation codes were aggregated to correspond to a specific Census occupation', 'code. In these cases, the Census occupation description is used for the SOC', 'occupation title."', '** These codes are pseudo codes developed by the Census Bureau and are not', ' official or equivalent NAICS or SOC codes.', 'Legend to Identify NAICS Equivalents', ' M = Multiple NAICS codes', ' P = Part of a NAICS code - NAICS code split between two or more Census', ' codes']
print("`dfp`, `dfh`: Load person and housing records.")
time_start = time.perf_counter()
for path in [path_pcsv, path_hcsv]:
with open(path) as fobj:
nlines = sum(1 for _ in fobj)
with open(path) as fobj:
first_line = fobj.readline()
ncols = first_line.count(',')+1
print("{path}:".format(path=path))
print(" size (MB) = {size:.1f}".format(size=os.path.getsize(path)/1e6))
print(" num lines = {nlines}".format(nlines=nlines))
print(" num columns = {ncols}".format(ncols=ncols))
print()
# For ss13pdc.csv, low_memory=False since otherwise pandas raises DtypeWarning.
dfp = pd.read_csv(path_pcsv, low_memory=False)
dfh = pd.read_csv(path_hcsv, low_memory=True)
for (name, df) in [('dfp', dfp), ('dfh', dfh)]:
print("{name} RAM usage (MB) = {mem:.1f}".format(
name=name, mem=df.memory_usage().sum()/1e6))
time_stop = time.perf_counter()
print()
print("Time elapsed (sec) = {diff:.1f}".format(diff=time_stop-time_start))
`dfp`, `dfh`: Load person and housing records. /mnt/disk-20151227t211000z/www2-census-gov/programs-surveys/acs/data/pums/2013/5-Year/ss13pdc.csv: size (MB) = 30.5 num lines = 30560 num columns = 295 /mnt/disk-20151227t211000z/www2-census-gov/programs-surveys/acs/data/pums/2013/5-Year/ss13hdc.csv: size (MB) = 13.5 num lines = 17501 num columns = 205 dfp RAM usage (MB) = 72.1 dfh RAM usage (MB) = 28.7 Time elapsed (sec) = 2.0
print("`dfp`: First 5 person records.")
dfp.head()
`dfp`: First 5 person records.
RT | SERIALNO | SPORDER | PUMA00 | PUMA10 | ST | ADJINC | PWGTP | AGEP | CIT | ... | PWGTP71 | PWGTP72 | PWGTP73 | PWGTP74 | PWGTP75 | PWGTP76 | PWGTP77 | PWGTP78 | PWGTP79 | PWGTP80 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | P | 2009000000403 | 1 | 102 | -9 | 11 | 1085467 | 20 | 38 | 1 | ... | 6 | 26 | 31 | 32 | 26 | 6 | 36 | 6 | 19 | 20 |
1 | P | 2009000001113 | 1 | 103 | -9 | 11 | 1085467 | 13 | 78 | 1 | ... | 13 | 30 | 12 | 13 | 4 | 4 | 18 | 24 | 4 | 21 |
2 | P | 2009000001113 | 2 | 103 | -9 | 11 | 1085467 | 25 | 39 | 1 | ... | 26 | 50 | 23 | 20 | 8 | 7 | 38 | 41 | 7 | 37 |
3 | P | 2009000001113 | 3 | 103 | -9 | 11 | 1085467 | 17 | 8 | 1 | ... | 15 | 32 | 17 | 15 | 6 | 4 | 26 | 32 | 5 | 30 |
4 | P | 2009000001978 | 1 | 103 | -9 | 11 | 1085467 | 37 | 53 | 1 | ... | 65 | 12 | 13 | 37 | 36 | 41 | 57 | 36 | 11 | 33 |
5 rows × 295 columns
print("`dfp`: First 5 housing records.")
dfh.head()
`dfp`: First 5 housing records.
insp | RT | SERIALNO | DIVISION | PUMA00 | PUMA10 | REGION | ST | ADJHSG | ADJINC | ... | WGTP71 | WGTP72 | WGTP73 | WGTP74 | WGTP75 | WGTP76 | WGTP77 | WGTP78 | WGTP79 | WGTP80 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 600 | H | 2009000000403 | 5 | 102 | -9 | 3 | 11 | 1086032 | 1085467 | ... | 6 | 25 | 30 | 32 | 26 | 6 | 36 | 6 | 18 | 19 |
1 | NaN | H | 2009000001113 | 5 | 103 | -9 | 3 | 11 | 1086032 | 1085467 | ... | 14 | 29 | 12 | 12 | 4 | 4 | 18 | 23 | 4 | 22 |
2 | 480 | H | 2009000001978 | 5 | 103 | -9 | 3 | 11 | 1086032 | 1085467 | ... | 65 | 12 | 14 | 37 | 36 | 41 | 57 | 36 | 11 | 34 |
3 | NaN | H | 2009000002250 | 5 | 105 | -9 | 3 | 11 | 1086032 | 1085467 | ... | 4 | 4 | 4 | 4 | 23 | 14 | 11 | 4 | 20 | 21 |
4 | 2500 | H | 2009000002985 | 5 | 101 | -9 | 3 | 11 | 1086032 | 1085467 | ... | 66 | 45 | 10 | 35 | 34 | 10 | 34 | 55 | 50 | 10 |
5 rows × 205 columns
print(
r"""`dfp`, `dfh`, `ddict`: Describe all columns ('variables') that aren't weights or flags.
Printed format:
[PERSON, HOUSING] RECORD
COL: Column name.
Column description.
Multi-line optional column notes.
1-3 line description of value meanings ('variable codes').
Multi-line statistical description and data type.
...
num columns described = ncols""")
print()
records_dfs = collections.OrderedDict([
('PERSON RECORD', {'dataframe': dfp, 'weight': pwt, 'replicate_weights': pwts}),
('HOUSING RECORD', {'dataframe': dfh, 'weight': hwt, 'replicate_weights': hwts})])
for record_type in records_dfs:
print(record_type)
df = records_dfs[record_type]['dataframe']
ncols_desc = 0 # number of columns described
for col in df.columns:
if col in ddict['record_types'][record_type]:
col_dict = ddict['record_types'][record_type][col]
desc = col_dict['description']
else:
col_dict = None
desc = 'Column not in data dictionary.'
if not (
(col.startswith('F') and (desc.endswith(' flag') or desc.endswith(' edit')))
or ('WGTP' in col and "Weight replicate" in desc)):
print("{col}: {desc}".format(col=col, desc=desc))
ncols_desc += 1
if col_dict is not None:
if 'notes' in col_dict:
print(" {notes}".format(notes=col_dict['notes']))
for (inum, var_code) in enumerate(col_dict['var_codes']):
var_code_desc = col_dict['var_codes'][var_code]
print(" {vc}: {vcd}".format(vc=var_code, vcd=var_code_desc))
if inum >= 2:
print(" ...")
break
print(' '+repr(df[col].describe()).replace('\n', '\n '))
print("num columns described = {ncd}".format(ncd=ncols_desc))
print()
`dfp`, `dfh`, `ddict`: Describe all columns ('variables') that aren't weights or flags. Printed format: [PERSON, HOUSING] RECORD COL: Column name. Column description. Multi-line optional column notes. 1-3 line description of value meanings ('variable codes'). Multi-line statistical description and data type. ... num columns described = ncols PERSON RECORD RT: Record Type P: Person Record count 30559 unique 1 top P freq 30559 Name: RT, dtype: object SERIALNO: Housing unit/GQ person serial number 200900000001..201399999999: Unique identifier count 3.055900e+04 mean 2.011081e+12 std 1.407751e+09 min 2.009000e+12 25% 2.010000e+12 50% 2.011001e+12 75% 2.012001e+12 max 2.013001e+12 Name: SERIALNO, dtype: float64 SPORDER: Person number 01..20: Person number count 30559.000000 mean 1.850584 std 1.235291 min 1.000000 25% 1.000000 50% 1.000000 75% 2.000000 max 13.000000 Name: SPORDER, dtype: float64 PUMA00: Public use microdata area code (PUMA) based on Census 2000 definition for data collected prior to 2012. Use in combination with PUMA10. ['Note: Public Use Microdata Areas (PUMAs) designate areas of 100,000 or more population. Use with ST for unique code. PUMA00 applies to data collected in calendar years 2011 and earlier. Users of multi-year datasets that contain data from before and after 2012 should use PUMA00 and PUMA10 together where possible, but not all PUMAs will be comparable. See http://tigerweb.geo.census.gov to identify PUMA changes between the two vintages.'] 00100..08200: Public use microdata area codes 7777: combination of 01801, 01802, and 01905 in Louisiana -0009: Code classification is Not Applicable for data collected in 2012 or later ... count 30559.000000 mean 55.840243 std 55.336541 min -9.000000 25% -9.000000 50% 101.000000 75% 103.000000 max 105.000000 Name: PUMA00, dtype: float64 PUMA10: Public use microdata area code (PUMA) based on 2010 Census definition for data Collected in 2012 or later. Use in combination with PUMA00. ['Note: Public Use Microdata Areas (PUMAs) designate areas of 100,000 or more population. Use with ST for unique code. PUMA10 applies to data collected in calendar year 2012 and later. Users of multi-year datasets that contain data from before and after 2012 should use PUMA00 and PUMA10 together where possible, but not all PUMAs will be comparable. See http://tigerweb.geo.census.gov to identify PUMA changes between the two vintages.'] 00100..70301: Public use microdata area codes -0009: Code classification is Not Applicable for data collected prior to 2012 count 30559.000000 mean 38.259923 std 55.395391 min -9.000000 25% -9.000000 50% -9.000000 75% 103.000000 max 105.000000 Name: PUMA10, dtype: float64 ST: State Code 01: Alabama/AL 02: Alaska/AK 04: Arizona/AZ ... count 30559 mean 11 std 0 min 11 25% 11 50% 11 75% 11 max 11 Name: ST, dtype: float64 ADJINC: Adjustment factor for income and earnings dollar amounts (6 implied decimal places) ['Note: The values of ADJINC inflation-adjusts reported income to 2013 dollars. ADJINC incorporates an adjustment that annualizes the different rolling reference periods for reported income (as done in the single-year data using the variable ADJINC from the 1-year file) and an adjustment to inflation-adjust the annualized income to 2013 dollars. ADJINC applies to variables FINCP and HINCP in the housing record, and variables INTP, OIP, PAP, PERNP, PINCP, RETP, SEMP, SSIP, SSP, and WAGP in the person record.'] 1085467: 2009 factor (0.999480 * 1.08603175) 1076540: 2010 factor (1.007624 * 1.06839475) 1054614: 2011 factor (1.018237 * 1.03572510) ... count 30559.000000 mean 1048186.138192 std 29716.696630 min 1007549.000000 25% 1024887.000000 50% 1054614.000000 75% 1076540.000000 max 1085467.000000 Name: ADJINC, dtype: float64 PWGTP: Person's weight 00001..09999: Integer weight of person count 30559.000000 mean 20.268039 std 13.310075 min 1.000000 25% 12.000000 50% 16.000000 75% 24.000000 max 173.000000 Name: PWGTP, dtype: float64 AGEP: Age 00: Under 1 year 01..99: 1 to 99 years (Top-coded***) count 30559.000000 mean 38.728198 std 21.780122 min 0.000000 25% 23.000000 50% 35.000000 75% 55.000000 max 95.000000 Name: AGEP, dtype: float64 CIT: Citizenship status 1: Born in the U.S. 2: Born in Puerto Rico, Guam, the U.S. Virgin Islands, or the Northern Marianas 3: Born abroad of American parent(s) ... count 30559.000000 mean 1.471252 std 1.201267 min 1.000000 25% 1.000000 50% 1.000000 75% 1.000000 max 5.000000 Name: CIT, dtype: float64 CITWP05: Year of naturalization write-in for data collected prior to 2012 bbbb: Not eligible - not naturalized 1925: 1925 or earlier (Bottom-coded) 1926: 1926 - 1930 ... count 1595.000000 mean 1110.813166 std 994.495059 min -9.000000 25% -9.000000 50% 1973.000000 75% 1999.000000 max 2011.000000 Name: CITWP05, dtype: float64 CITWP12: Year of naturalization write-in for data collected in 2012 or later bbbb: Not eligible - not naturalized 1928: 1928 or earlier (Bottom-coded) 1929: 1929 - 1933 ... count 1595.000000 mean 875.462696 std 996.639591 min -9.000000 25% -9.000000 50% -9.000000 75% 1999.000000 max 2013.000000 Name: CITWP12, dtype: float64 COW: Class of worker b: Not in universe (less than 16 years old/NILF who last worked more than 5 years ago or never worked) 1: Employee of a private for-profit company or business, or of an individual, for wages, salary, or commissions 2: Employee of a private not-for-profit, tax-exempt, or charitable organization ... count 20557.000000 mean 2.592937 std 1.971727 min 1.000000 25% 1.000000 50% 2.000000 75% 5.000000 max 9.000000 Name: COW, dtype: float64 DDRS: Self-care difficulty b: N/A (Less than 5 years old) 1: Yes 2: No ... count 29078.000000 mean 1.969668 std 0.171503 min 1.000000 25% 2.000000 50% 2.000000 75% 2.000000 max 2.000000 Name: DDRS, dtype: float64 DEAR: Hearing difficulty 1: Yes 2: No count 30559.000000 mean 1.978010 std 0.146654 min 1.000000 25% 2.000000 50% 2.000000 75% 2.000000 max 2.000000 Name: DEAR, dtype: float64 DEYE: Vision difficulty 1: Yes 2: No count 30559.000000 mean 1.975163 std 0.155631 min 1.000000 25% 2.000000 50% 2.000000 75% 2.000000 max 2.000000 Name: DEYE, dtype: float64 DOUT: Independent living difficulty b: N/A (Less than 15 years old) 1: Yes 2: No ... count 26658.000000 mean 1.943432 std 0.231020 min 1.000000 25% 2.000000 50% 2.000000 75% 2.000000 max 2.000000 Name: DOUT, dtype: float64 DPHY: Ambulatory difficulty b: N/A (Less than 5 years old) 1: Yes 2: No ... count 29078.000000 mean 1.922966 std 0.266650 min 1.000000 25% 2.000000 50% 2.000000 75% 2.000000 max 2.000000 Name: DPHY, dtype: float64 DRAT: Veteran service connected disability rating (percentage) b: N/A (No service-connected disability/never served in military) 1: 0 percent 2: 10 or 20 percent ... count 287.000000 mean 3.400697 std 1.587671 min 1.000000 25% 2.000000 50% 3.000000 75% 5.000000 max 6.000000 Name: DRAT, dtype: float64 DRATX: Veteran service connected disability rating (checkbox) b: N/A (Less than 17 years old/never served in military) 1: Yes 2: No ... count 2072.000000 mean 1.861486 std 0.345522 min 1.000000 25% 2.000000 50% 2.000000 75% 2.000000 max 2.000000 Name: DRATX, dtype: float64 DREM: Cognitive difficulty b: N/A (Less than 5 years old) 1: Yes 2: No ... count 29078.000000 mean 1.943015 std 0.231817 min 1.000000 25% 2.000000 50% 2.000000 75% 2.000000 max 2.000000 Name: DREM, dtype: float64 ENG: Ability to speak English b: N/A (less than 5 years old/speaks only English) 1: Very well 2: Well ... count 4231.000000 mean 1.429449 std 0.750207 min 1.000000 25% 1.000000 50% 1.000000 75% 2.000000 max 4.000000 Name: ENG, dtype: float64 FER: Gave birth to child within the past 12 months ['NOTE: Problems in the collection of data on women who gave birth in the past year (FER) in 2012 led to suppressing this variable in 59 PUMAs within states Florida, Georgia, Kansas, Montana, North Carolina, Ohio and Texas. This only affects 2012 vintage data. See the Estimation section of the Accuracy of the Data for the 2009-2013 5-year PUMS for more information on PUMS estimates using FER. http://www.census.gov/acs/www/data_documentation/pums_documentation/'] b: N/A (less than 15 years/greater than 50 years/ male) 1: Yes 2: No ... count 9036.000000 mean 1.955069 std 0.207165 min 1.000000 25% 2.000000 50% 2.000000 75% 2.000000 max 2.000000 Name: FER, dtype: float64 GCL: Grandparents living with grandchildren b: N/A (less than 30 years/institutional GQ) 1: Yes 2: No ... count 18439.000000 mean 1.968599 std 0.174403 min 1.000000 25% 2.000000 50% 2.000000 75% 2.000000 max 2.000000 Name: GCL, dtype: float64 GCM: Length of time responsible for grandchildren b: N/A (less than 30 years/grandparent not responsible for grandchild/institutional GQ) 1: Less than 6 months 2: 6 to 11 months ... count 228.000000 mean 4.026316 std 1.078100 min 1.000000 25% 3.000000 50% 4.000000 75% 5.000000 max 5.000000 Name: GCM, dtype: float64 GCR: Grandparents responsible for grandchildren b: N/A (less than 30 years/institutional GQ/grandparent not living with grandchild) 1: Yes 2: No ... count 579.000000 mean 1.606218 std 0.489010 min 1.000000 25% 1.000000 50% 2.000000 75% 2.000000 max 2.000000 Name: GCR, dtype: float64 HINS1: Insurance through a current or former employer or union 1: Yes 2: No count 30559.000000 mean 1.387120 std 0.487099 min 1.000000 25% 1.000000 50% 1.000000 75% 2.000000 max 2.000000 Name: HINS1, dtype: float64 HINS2: Insurance purchased directly from an insurance company 1: Yes 2: No count 30559.000000 mean 1.852548 std 0.354562 min 1.000000 25% 2.000000 50% 2.000000 75% 2.000000 max 2.000000 Name: HINS2, dtype: float64 HINS3: Medicare, for people 65 and older, or people with certain disabilities 1: Yes 2: No count 30559.000000 mean 1.847999 std 0.359028 min 1.000000 25% 2.000000 50% 2.000000 75% 2.000000 max 2.000000 Name: HINS3, dtype: float64 HINS4: Medicaid, Medical Assistance, or any kind of government-assistance plan for those with low incomes or a disability 1: Yes 2: No count 30559.000000 mean 1.763016 std 0.425239 min 1.000000 25% 2.000000 50% 2.000000 75% 2.000000 max 2.000000 Name: HINS4, dtype: float64 HINS5: TRICARE or other military health care 1: Yes 2: No count 30559.000000 mean 1.973461 std 0.160734 min 1.000000 25% 2.000000 50% 2.000000 75% 2.000000 max 2.000000 Name: HINS5, dtype: float64 HINS6: VA (including those who have ever used or enrolled for VA health care) 1: Yes 2: No count 30559.000000 mean 1.981675 std 0.134127 min 1.000000 25% 2.000000 50% 2.000000 75% 2.000000 max 2.000000 Name: HINS6, dtype: float64 HINS7: Indian Health Service 1: Yes 2: No count 30559.000000 mean 1.999215 std 0.028014 min 1.000000 25% 2.000000 50% 2.000000 75% 2.000000 max 2.000000 Name: HINS7, dtype: float64 INTP: Interest, dividends, and net rental income past 12 months (signed) ['Note: Use values from ADJINC to adjust INTP to constant dollars.'] bbbbbb: N/A (less than 15 years old) 000000: None -09999..-00001: Loss $1 to $9999 (Rounded and bottom-coded) ... count 26658.000000 mean 2798.324368 std 18916.559752 min -7700.000000 25% 0.000000 50% 0.000000 75% 0.000000 max 325000.000000 Name: INTP, dtype: float64 JWMNP: Travel time to work bbb: N/A (not a worker or worker who worked at home) 001..200: 1 to 200 minutes to get to work (Top-coded) count 14545.000000 mean 29.764043 std 19.584350 min 1.000000 25% 15.000000 50% 30.000000 75% 40.000000 max 142.000000 Name: JWMNP, dtype: float64 JWRIP: Vehicle occupancy bb: N/A (not a worker or worker whose means of transportation to work was not car, truck, or van) 01: Drove alone 02: In 2-person carpool ... count 6211.000000 mean 1.224602 std 0.677173 min 1.000000 25% 1.000000 50% 1.000000 75% 1.000000 max 10.000000 Name: JWRIP, dtype: float64 JWTR: Means of transportation to work bb: N/A (not a worker--not in the labor force, including persons under 16 years; unemployed; employed, with a job but not at work; Armed Forces, with a job but not at work) 01: Car, truck, or van 02: Bus or trolley bus ... count 15327.000000 mean 3.863900 std 3.554906 min 1.000000 25% 1.000000 50% 2.000000 75% 4.000000 max 12.000000 Name: JWTR, dtype: float64 LANX: Language other than English spoken at home b: N/A (less than 5 years old) 1: Yes, speaks another language 2: No, speaks only English ... count 29078.000000 mean 1.854495 std 0.352616 min 1.000000 25% 2.000000 50% 2.000000 75% 2.000000 max 2.000000 Name: LANX, dtype: float64 MAR: Marital status 1: Married 2: Widowed 3: Divorced ... count 30559.000000 mean 3.659118 std 1.737333 min 1.000000 25% 1.000000 50% 5.000000 75% 5.000000 max 5.000000 Name: MAR, dtype: float64 MARHD: Divorced in the past 12 months b: N/A (age less than 15 years; never married) 1: Yes 2: No ... count 12371.000000 mean 1.982297 std 0.131874 min 1.000000 25% 2.000000 50% 2.000000 75% 2.000000 max 2.000000 Name: MARHD, dtype: float64 MARHM: Married in the past 12 months b: N/A (age less than 15 years; never married) 1: Yes 2: No ... count 12371.000000 mean 1.954086 std 0.209307 min 1.000000 25% 2.000000 50% 2.000000 75% 2.000000 max 2.000000 Name: MARHM, dtype: float64 MARHT: Number of times married b: N/A (age less than 15 years; never married) 1: One time 2: Two times ... count 12371.000000 mean 1.207259 std 0.461325 min 1.000000 25% 1.000000 50% 1.000000 75% 1.000000 max 3.000000 Name: MARHT, dtype: float64 MARHW: Widowed in the past 12 months b: N/A (age less than 15 years; never married) 1: Yes 2: No ... count 12371.000000 mean 1.990866 std 0.095140 min 1.000000 25% 2.000000 50% 2.000000 75% 2.000000 max 2.000000 Name: MARHW, dtype: float64 MARHYP05: Year last married for data collected prior to 2012 bbbb: N/A (age less than 15 years; never married) 1928: 1928 or earlier (Bottom-coded) 1929: 1929 ... count 12371.000000 mean 1158.447337 std 982.950909 min -9.000000 25% -9.000000 50% 1963.000000 75% 1994.000000 max 2011.000000 Name: MARHYP05, dtype: float64 MARHYP12: Year last married for data collected in 2012 or later bbbb: N/A (age less than 15 years; never married) 1932: 1932 or earlier (Bottom-coded) 1933: 1933 ... count 12371.000000 mean 819.956026 std 984.800070 min -9.000000 25% -9.000000 50% -9.000000 75% 1987.000000 max 2013.000000 Name: MARHYP12, dtype: float64 MIG: Mobility status (lived here 1 year ago) b: N/A (less than 1 year old) 1: Yes, same house (nonmovers) 2: No, outside US and Puerto Rico ... count 30229.00000 mean 1.37163 std 0.76873 min 1.00000 25% 1.00000 50% 1.00000 75% 1.00000 max 3.00000 Name: MIG, dtype: float64 MIL: Military service b: N/A (less than 17 years old) 1: Now on active duty 2: On active duty in the past, but not now ... count 26114.000000 mean 3.842843 std 0.549228 min 1.000000 25% 4.000000 50% 4.000000 75% 4.000000 max 4.000000 Name: MIL, dtype: float64 MLPA: Served September 2001 or later b: N/A (Less than 17 years old/no active duty) 0: Did not serve this period 1: Served this period ... count 1855.000000 mean 0.224798 std 0.417562 min 0.000000 25% 0.000000 50% 0.000000 75% 0.000000 max 1.000000 Name: MLPA, dtype: float64 MLPB: Served August 1990 - August 2001 (including Persian Gulf War) b: N/A (Less than 17 years old/no active duty) 0: Did not serve this period 1: Served this period ... count 1855.000000 mean 0.161186 std 0.367802 min 0.000000 25% 0.000000 50% 0.000000 75% 0.000000 max 1.000000 Name: MLPB, dtype: float64 MLPCD: Served May 1975 - July 1990 b: N/A (less than 17 years old/no active duty) 0: Did not serve this period 1: Served this period ... count 1855.000000 mean 0.232345 std 0.422442 min 0.000000 25% 0.000000 50% 0.000000 75% 0.000000 max 1.000000 Name: MLPCD, dtype: float64 MLPE: Served Vietnam era (August 1964 - April 1975) b: N/A (Less than 17 years old/no active duty) 0: Did not serve this period 1: Served this period ... count 1855.000000 mean 0.290027 std 0.453897 min 0.000000 25% 0.000000 50% 0.000000 75% 1.000000 max 1.000000 Name: MLPE, dtype: float64 MLPFG: Served February 1955 - July 1964 b: N/A (less than 17 years old/no active duty) 0: Did not serve this period 1: Served this period ... count 1855.000000 mean 0.145013 std 0.352209 min 0.000000 25% 0.000000 50% 0.000000 75% 0.000000 max 1.000000 Name: MLPFG, dtype: float64 MLPH: Served Korean War (July 1950 - January 1955) b: N/A (Less than 17 years old/no active duty) 0: Did not serve this period 1: Served this period ... count 1855.000000 mean 0.117520 std 0.322126 min 0.000000 25% 0.000000 50% 0.000000 75% 0.000000 max 1.000000 Name: MLPH, dtype: float64 MLPI: Served January 1947 - June 1950 b: N/A (Less than 17 years old/no active duty) 0: Did not serve this period 1: Served this period ... count 1855.000000 mean 0.017790 std 0.132222 min 0.000000 25% 0.000000 50% 0.000000 75% 0.000000 max 1.000000 Name: MLPI, dtype: float64 MLPJ: Served World War II (December 1941 - December 1946) b: N/A (Less than 17 years old/no active duty) 0: Did not serve this period 1: Served this period ... count 1855.000000 mean 0.086253 std 0.280814 min 0.000000 25% 0.000000 50% 0.000000 75% 0.000000 max 1.000000 Name: MLPJ, dtype: float64 MLPK: Served November 1941 or earlier b: N/A (Less than 17 years old/no active duty) 0: Did not serve this period 1: Served this period ... count 1855.000000 mean 0.003774 std 0.061330 min 0.000000 25% 0.000000 50% 0.000000 75% 0.000000 max 1.000000 Name: MLPK, dtype: float64 NWAB: Temporary absence from work (UNEDITED-See "Employment Status Recode" (ESR)) b: N/A (less than 16 years old/at work/on layoff) 1: Yes 2: No ... count 26399.000000 mean 2.564074 std 0.529434 min 1.000000 25% 2.000000 50% 3.000000 75% 3.000000 max 3.000000 Name: NWAB, dtype: float64 NWAV: Available for work (UNEDITED-See "Employment Status Recode" (ESR)) b: N/A (less than 16 years/at work/not looking) 1: Yes 2: No, temporarily ill ... count 26399.000000 mean 4.530702 std 1.224743 min 1.000000 25% 5.000000 50% 5.000000 75% 5.000000 max 5.000000 Name: NWAV, dtype: float64 NWLA: On layoff from work (UNEDITED-See "Employment Status Recode" (ESR)) b: N/A (less than 16 years old/at work) 1: Yes 2: No ... count 26399.000000 mean 2.540134 std 0.524830 min 1.000000 25% 2.000000 50% 3.000000 75% 3.000000 max 3.000000 Name: NWLA, dtype: float64 NWLK: Looking for work (UNEDITED-See "Employment Status Recode" (ESR)) b: N/A (less than 16 years old/at work/temporarily absent/informed of recall) 1: Yes 2: No ... count 26399.000000 mean 2.503315 std 0.635689 min 1.000000 25% 2.000000 50% 3.000000 75% 3.000000 max 3.000000 Name: NWLK, dtype: float64 NWRE: Informed of recall (UNEDITED-See "Employment Status Recode" (ESR)) b: N/A (less than 16 years old/at work/not on layoff) 1: Yes 2: No ... count 26399.000000 mean 2.903974 std 0.315014 min 1.000000 25% 3.000000 50% 3.000000 75% 3.000000 max 3.000000 Name: NWRE, dtype: float64 OIP: All other income past 12 months ['Note: Use values from ADJINC to adjust OIP to constant dollars.'] bbbbbb: N/A (less than 15 years old) 000000: None 000001..999999: $1 to $999999 (Rounded and top-coded) ... count 26658.000000 mean 675.345037 std 4722.241622 min 0.000000 25% 0.000000 50% 0.000000 75% 0.000000 max 83000.000000 Name: OIP, dtype: float64 PAP: Public assistance income past 12 months ['Note: Use values from ADJINC to adjust PAP to constant dollars.'] bbbbb: N/A (less than 15 years old) 00000: None 00001..99999: $1 to $99999 (Rounded) ... count 26658.000000 mean 76.790832 std 692.300350 min 0.000000 25% 0.000000 50% 0.000000 75% 0.000000 max 25400.000000 Name: PAP, dtype: float64 RELP: Relationship 00: Reference person 01: Husband/wife 02: Biological son or daughter ... count 30559.000000 mean 3.656795 std 5.569053 min 0.000000 25% 0.000000 50% 1.000000 75% 5.000000 max 17.000000 Name: RELP, dtype: float64 RETP: Retirement income past 12 months ['Note: Use values from ADJINC to adjust RETP to constant dollars.'] bbbbbb: N/A (less than 15 years old) 000000: None 000001..999999: $1 to $999999 (Rounded and top-coded) ... count 26658.000000 mean 3493.095881 std 15552.960973 min 0.000000 25% 0.000000 50% 0.000000 75% 0.000000 max 249000.000000 Name: RETP, dtype: float64 SCH: School enrollment b: N/A (less than 3 years old) 1: No, has not attended in the last 3 months 2: Yes, public school or public college ... count 29645.000000 mean 1.376050 std 0.691228 min 1.000000 25% 1.000000 50% 1.000000 75% 2.000000 max 3.000000 Name: SCH, dtype: float64 SCHG: Grade level attending bb: N/A (not attending school) 01: Nursery school/preschool 02: Kindergarten ... count 7544.00000 mean 11.28526 std 5.07951 min 1.00000 25% 7.00000 50% 14.00000 75% 15.00000 max 16.00000 Name: SCHG, dtype: float64 SCHL: Educational attainment bb: N/A (less than 3 years old) 01: No schooling completed 02: Nursery school, preschool ... count 29645.000000 mean 17.468173 std 5.585758 min 1.000000 25% 16.000000 50% 19.000000 75% 21.000000 max 24.000000 Name: SCHL, dtype: float64 SEMP: Self-employment income past 12 months (signed) ['Note: Use values from ADJINC to adjust SEMP to constant dollars.'] bbbbbb: N/A (less than 15 years old) 000000: None -10000..-00001: Loss $1 to $10000 (Rounded and bottom-coded) ... count 26658.000000 mean 2956.785243 std 30447.719592 min -9100.000000 25% 0.000000 50% 0.000000 75% 0.000000 max 727000.000000 Name: SEMP, dtype: float64 SEX: Sex 1: Male 2: Female count 30559.000000 mean 1.537878 std 0.498571 min 1.000000 25% 1.000000 50% 2.000000 75% 2.000000 max 2.000000 Name: SEX, dtype: float64 SSIP: Supplementary Security Income past 12 months ['Note: Use values from ADJINC to adjust SSIP to constant dollars.'] bbbbb: N/A (less than 15 years old) 00000: None 00001..99999: $1 to $99999 (Rounded) ... count 26658.000000 mean 296.556381 std 1651.630937 min 0.000000 25% 0.000000 50% 0.000000 75% 0.000000 max 30000.000000 Name: SSIP, dtype: float64 SSP: Social Security income past 12 months ['Note: Use values from ADJINC to adjust SSP to constant dollars.'] bbbbb: N/A (less than 15 years old) 00000: None 00001..99999: $1 to $99999 (Rounded) ... count 26658.000000 mean 1618.331458 std 4844.120790 min 0.000000 25% 0.000000 50% 0.000000 75% 0.000000 max 50000.000000 Name: SSP, dtype: float64 WAGP: Wages or salary income past 12 months ['Note: Use values from ADJINC to adjust WAGP to constant dollars.'] bbbbbb: N/A (less than 15 years old) 000000: None 000001..999999: $1 to 999999 (Rounded and top-coded) ... count 26658.000000 mean 41347.736139 std 69993.911285 min 0.000000 25% 0.000000 50% 15000.000000 75% 60000.000000 max 660000.000000 Name: WAGP, dtype: float64 WKHP: Usual hours worked per week past 12 months bb: N/A (less than 16 years old/did not work during the past 12 months) 01..98: 1 to 98 usual hours 99: 99 or more usual hours ... count 17950.000000 mean 39.674485 std 13.046600 min 1.000000 25% 38.000000 50% 40.000000 75% 45.000000 max 99.000000 Name: WKHP, dtype: float64 WKL: When last worked b: N/A (less than 16 years old) 1: Within the past 12 months 2: 1-5 years ago ... count 26399.000000 mean 1.550930 std 0.842136 min 1.000000 25% 1.000000 50% 1.000000 75% 2.000000 max 3.000000 Name: WKL, dtype: float64 WKW: Weeks worked during past 12 months b: N/A (less than 16 years old/did not work during the past 12 months) 1: 50 to 52 weeks 2: 48 to 49 weeks ... count 17950.000000 mean 1.952201 std 1.698859 min 1.000000 25% 1.000000 50% 1.000000 75% 3.000000 max 6.000000 Name: WKW, dtype: float64 WRK: Worked last week b: N/A (not reported) 1: Worked 2: Did not work ... count 24046.000000 mean 1.390086 std 0.487779 min 1.000000 25% 1.000000 50% 1.000000 75% 2.000000 max 2.000000 Name: WRK, dtype: float64 YOEP05: Year of entry for data collected prior to 2012 bbbb: Not eligible - Born in the US 1919: 1919 or earlier (Bottom-coded) 1920: 1920 ... count 4268.000000 mean 1126.390112 std 990.940648 min -9.000000 25% -9.000000 50% 1970.000000 75% 1997.000000 max 2011.000000 Name: YOEP05, dtype: float64 YOEP12: Year of entry for data collected in 2012 or later bbbb: Not eligible - Born in the US 1921: 1921 or earlier (Bottom-coded) 1922: 1922 - 1923 ... count 4268.000000 mean 856.887769 std 992.461884 min -9.000000 25% -9.000000 50% -9.000000 75% 1995.000000 max 2013.000000 Name: YOEP12, dtype: float64 ANC: Ancestry recode 1: Single 2: Multiple 3: Unclassified ... count 30559.000000 mean 1.555810 std 0.943709 min 1.000000 25% 1.000000 50% 1.000000 75% 2.000000 max 4.000000 Name: ANC, dtype: float64 ANC1P05: Recoded Detailed Ancestry for data collected prior to 2012 - first entry 001: Alsatian 003: Austrian 005: Basque ... count 30559.000000 mean 339.916359 std 420.806625 min -9.000000 25% -9.000000 50% 50.000000 75% 902.000000 max 999.000000 Name: ANC1P05, dtype: float64 ANC1P12: Recoded Detailed Ancestry for data collected in 2012 or later - first entry 001: Alsatian 003: Austrian 005: Basque ... count 30559.000000 mean 251.538041 std 397.645131 min -9.000000 25% -9.000000 50% -9.000000 75% 615.000000 max 999.000000 Name: ANC1P12, dtype: float64 ANC2P05: Recoded Detailed Ancestry for data collected prior to 2012 - second entry 001: Alsatian 003: Austrian 005: Basque ... count 30559.000000 mean 478.113944 std 494.015425 min -9.000000 25% -9.000000 50% 125.000000 75% 999.000000 max 999.000000 Name: ANC2P05, dtype: float64 ANC2P12: Recoded Detailed Ancestry for data collected in 2012 or later - second entry 001: Alsatian 003: Austrian 005: Basque ... count 30559.000000 mean 346.208515 std 473.692419 min -9.000000 25% -9.000000 50% -9.000000 75% 999.000000 max 999.000000 Name: ANC2P12, dtype: float64 DECADE: Decade of entry b: N/A (Born in the US) 1: Before 1950 2: 1950 - 1959 ... count 4268.000000 mean 5.698454 std 1.477502 min 1.000000 25% 5.000000 50% 6.000000 75% 7.000000 max 7.000000 Name: DECADE, dtype: float64 DIS: Disability recode 1: With a disability 2: Without a disability count 30559.000000 mean 1.871527 std 0.334621 min 1.000000 25% 2.000000 50% 2.000000 75% 2.000000 max 2.000000 Name: DIS, dtype: float64 DRIVESP: Number of vehicles calculated from JWRI b: N/A (Nonworker or worker who does not drive to work) 1: 1.000 vehicles (Drove alone) 2: 0.500 vehicles (In a 2-person carpool) ... count 6211.000000 mean 1.217517 std 0.608995 min 1.000000 25% 1.000000 50% 1.000000 75% 1.000000 max 6.000000 Name: DRIVESP, dtype: float64 ESP: Employment status of parents b: N/A (not own child of householder, and not child in subfamily) Living with two parents: 1: Both parents in labor force 2: Father only in labor force ... count 4318.000000 mean 4.348541 std 2.889376 min 1.000000 25% 1.000000 50% 5.000000 75% 7.000000 max 8.000000 Name: ESP, dtype: float64 ESR: Employment status recode b: N/A (less than 16 years old) 1: Civilian employed, at work 2: Civilian employed, with a job but not at work ... count 26399.000000 mean 2.880071 std 2.322380 min 1.000000 25% 1.000000 50% 1.000000 75% 6.000000 max 6.000000 Name: ESR, dtype: float64 FOD1P: Recoded field of degree - first entry bbbb: N/A (less than bachelor's degree) 1100: GENERAL AGRICULTURE 1101: AGRICULTURE PRODUCTION AND MANAGEMENT ... count 12871.000000 mean 4518.419470 std 1596.571638 min 1100.000000 25% 3301.000000 50% 5404.000000 75% 5507.000000 max 6403.000000 Name: FOD1P, dtype: float64 FOD2P: Recoded field of degree - second entry bbbb: N/A (less than bachelor's degree) 1100: GENERAL AGRICULTURE 1101: AGRICULTURE PRODUCTION AND MANAGEMENT ... count 2230.000000 mean 4427.388789 std 1610.279474 min 1101.000000 25% 2602.000000 50% 5200.000000 75% 5506.000000 max 6403.000000 Name: FOD2P, dtype: float64 HICOV: Health insurance coverage recode 1: With health insurance coverage 2: No health insurance coverage count 30559.00000 mean 1.05815 std 0.23403 min 1.00000 25% 1.00000 50% 1.00000 75% 1.00000 max 2.00000 Name: HICOV, dtype: float64 HISP: Recoded detailed Hispanic origin 01: Not Spanish/Hispanic/Latino 02: Mexican 03: Puerto Rican ... count 30559.000000 mean 1.666808 std 2.950180 min 1.000000 25% 1.000000 50% 1.000000 75% 1.000000 max 24.000000 Name: HISP, dtype: float64 INDP: Industry recode based on 2012 IND codes ['NOTE: Changes were made to this variable between the 2008-2012 and 2009-2013 5-year PUMS files. For additional information on industry groupings within major categories visit our website at: http://www.census.gov/people/io/methodology.'] bbbb: Not in universe (less than 16 years old/NILF who last worked more than 5 years ago or never worked) 0170: AGR-CROP PRODUCTION 0180: AGR-ANIMAL PRODUCTION AND AQUACULTURE ... count 20557.000000 mean 7685.842584 std 1845.245220 min 170.000000 25% 7270.000000 50% 7870.000000 75% 9160.000000 max 9920.000000 Name: INDP, dtype: float64 JWAP: Time of arrival at work - hour and minute bbb: N/A (not a worker; worker who worked at home) 001: 12:00 a.m. to 12:04 a.m. 002: 12:05 a.m. to 12:09 a.m. ... count 14545.000000 mean 106.071640 std 33.565316 min 1.000000 25% 91.000000 50% 100.000000 75% 109.000000 max 284.000000 Name: JWAP, dtype: float64 JWDP: Time of departure for work - hour and minute bbb: N/A (not a worker; worker who worked at home) 001: 12:00 a.m. to 12:29 a.m. 002: 12:30 a.m. to 12:59 a.m. ... count 14545.000000 mean 57.893022 std 22.927208 min 1.000000 25% 43.000000 50% 55.000000 75% 64.000000 max 150.000000 Name: JWDP, dtype: float64 LANP05: Language spoken at home for data collected prior to 2012 bbb: N/A (less than 5 years old/speaks only English) 601: Jamaican Creole 607: German ... count 4231.000000 mean 362.528717 std 336.415892 min -9.000000 25% -9.000000 50% 620.000000 75% 625.000000 max 994.000000 Name: LANP05, dtype: float64 LANP12: Language spoken at home for data collected in 2012 or later bbb: N/A (less than 5 years old/speaks only English) 601: Jamaican Creole 602: Krio ... count 4231.000000 mean 290.464429 std 337.743570 min -9.000000 25% -9.000000 50% -9.000000 75% 625.000000 max 994.000000 Name: LANP12, dtype: float64 MIGPUMA00: Migration PUMA based on Census 2000 definition for data collected prior to 2012 bbbbb: N/A (person less than 1 year old/lived in same house 1 year ago) 00001: Did not live in the United States or in Puerto Rico one year ago 00002: Lived in Puerto Rico one year ago and current residence is in the U.S. ... count 5832.000000 mean 465.729767 std 1096.576145 min -9.000000 25% -9.000000 50% 100.000000 75% 100.000000 max 8100.000000 Name: MIGPUMA00, dtype: float64 MIGPUMA10: Migration PUMA based on 2010 Census definition for data collected in 2012 or later bbbbb: N/A (person less than 1 year old/lived in same house 1 year ago) 00001: Did not live in the United States or in Puerto Rico one year ago 00002: Lived in Puerto Rico one year ago and current residence is in the U.S. ... count 5832.000000 mean 1528.504630 std 7819.009818 min -9.000000 25% -9.000000 50% -9.000000 75% 100.000000 max 59300.000000 Name: MIGPUMA10, dtype: float64 MIGSP05: Migration recode for data collected prior to 2012 - State or foreign country code bbb: N/A (person less than 1 year old/lived in same house 1 year ago) 001: Alabama/AL 002: Alaska/AK ... count 5832.000000 mean 16.364026 std 57.178932 min -9.000000 25% -9.000000 50% 11.000000 75% 12.000000 max 554.000000 Name: MIGSP05, dtype: float64 MIGSP12: Migration recode for data collected in 2012 or later - State or foreign country code bbb: N/A (person less than 1 year old/lived in same house 1 year ago) 001: Alabama/AL 002: Alaska/AK ... count 5832.000000 mean 11.033951 std 50.483716 min -9.000000 25% -9.000000 50% -9.000000 75% 11.000000 max 555.000000 Name: MIGSP12, dtype: float64 MSP: Married, spouse present/spouse absent b: N/A (age less than 15 years) 1: Now married, spouse present 2: Now married, spouse absent ... count 26658.000000 mean 4.202491 std 2.152624 min 1.000000 25% 1.000000 50% 6.000000 75% 6.000000 max 6.000000 Name: MSP, dtype: float64 NAICSP: NAICS Industry code based on 2012 NAICS codes ['NOTE: Changes were made to this variable between the 2008-2012 and 2009-2013 5-year PUMS files. For additional information on NAICS groupings within major categories visit our website at: http://www.census.gov/people/io/methodology.'] bbbbbbbb: Not in universe (less than 16 years old/NILF who last worked more than 5 years ago or never worked) 111 : AGR-CROP PRODUCTION 112 : AGR-ANIMAL PRODUCTION AND AQUACULTURE ... count 20557 unique 223 top 722Z freq 993 Name: NAICSP, dtype: object NATIVITY: Nativity 1: Native 2: Foreign born count 30559.000000 mean 1.123237 std 0.328714 min 1.000000 25% 1.000000 50% 1.000000 75% 1.000000 max 2.000000 Name: NATIVITY, dtype: float64 NOP: Nativity of parent b: N/A (greater than 17 years old/not an own child of householder, and not child in subfamily) 1: Living with two parents: Both parents NATIVE 2: Living with two parents: Father only FOREIGN BORN ... count 4312.000000 mean 4.474954 std 2.696136 min 1.000000 25% 1.000000 50% 5.000000 75% 7.000000 max 8.000000 Name: NOP, dtype: float64 OC: Own child 0: No (includes GQ) 1: Yes count 30559.000000 mean 0.124775 std 0.330469 min 0.000000 25% 0.000000 50% 0.000000 75% 0.000000 max 1.000000 Name: OC, dtype: float64 OCCP02: Occupation recode for data collected in 2009 based on 2002 OCC codes bbbb: Not in universe (less than 16 years old/NILF who last worked more than 5 years ago or never worked) 0010: MGR-CHIEF EXECUTIVES AND LEGISLATORS 0020: MGR-GENERAL AND OPERATIONS MANAGERS ... count 20557 unique 314 top N.A. freq 16866 Name: OCCP02, dtype: object OCCP10: Occupation recode for data collected in 2010 and 2011 based on 2010 OCC codes bbbb: Not in universe (less than 16 years old/NILF who last worked more than 5 years ago or never worked) 0010: MGR-CHIEF EXECUTIVES AND LEGISLATORS 0020: MGR-GENERAL AND OPERATIONS MANAGERS ... count 20557 unique 370 top N.A. freq 12485 Name: OCCP10, dtype: object OCCP12: Occupation recode for data collected in 2012 or later based on 2010 OCC codes ['NOTE: For additional information on NAICS and SOC groupings within major categories visit our website at: http://www.census.gov/people/io/methodology.'] bbbb: N/A (less than 16 years old/NILF who last worked more than 5 years ago or never worked) 0010: MGR-CHIEF EXECUTIVES AND LEGISLATORS 0020: MGR-GENERAL AND OPERATIONS MANAGERS ... count 20557 unique 357 top N.A. freq 11763 Name: OCCP12, dtype: object PAOC: Presence and age of own children b: N/A (male/female under 16 years old/GQ) 1: Females with own children under 6 years only 2: Females with own children 6 to 17 years only ... count 13083.000000 mean 3.616372 std 0.875946 min 1.000000 25% 4.000000 50% 4.000000 75% 4.000000 max 4.000000 Name: PAOC, dtype: float64 PERNP: Total person's earnings ['Note: Use values from ADJINC to adjust PERNP to constant dollars.'] bbbbbbb: N/A (less than 15 years old) 0000000: No earnings -010000: Loss of $10000 or more (Rounded & bottom-coded components) ... count 26399.000000 mean 44739.192053 std 77239.316348 min -9100.000000 25% 0.000000 50% 20000.000000 75% 62000.000000 max 1360000.000000 Name: PERNP, dtype: float64 PINCP: Total person's income (signed) ['Note: Use values from ADJINC to adjust PINCP to constant dollars.'] bbbbbbb: N/A (less than 15 years old) 0000000: None -019999: Loss of $19999 or more (Rounded & bottom-coded components) ... count 26658.000000 mean 53262.965339 std 82561.741382 min -13600.000000 25% 7200.000000 50% 30000.000000 75% 70000.000000 max 1471000.000000 Name: PINCP, dtype: float64 POBP05: Place of birth (Recode) for data collected prior to 2012 001: Alabama/AL 002: Alaska/AK 004: Arizona/AZ ... count 30559.000000 mean 29.560948 std 80.011666 min -9.000000 25% -9.000000 50% 11.000000 75% 31.000000 max 554.000000 Name: POBP05, dtype: float64 POBP12: Place of birth (Recode) for data collected in 2012 or later 001: Alabama/AL 002: Alaska/AK 004: Arizona/AZ ... count 30559.000000 mean 19.925554 std 72.939454 min -9.000000 25% -9.000000 50% -9.000000 75% 12.000000 max 515.000000 Name: POBP12, dtype: float64 POVPIP: Income-to-poverty ratio recode bbb: N/A 000..500: Below 501 percent 501: 501 percent or more ... count 28378.000000 mean 331.494045 std 181.116198 min 0.000000 25% 158.000000 50% 398.000000 75% 501.000000 max 501.000000 Name: POVPIP, dtype: float64 POWPUMA00: Place of work PUMA based on Census 2000 definition for data collected prior to 2012 bbbbb: N/A (not a worker--not in the labor force, including persons under 16 years; unemployed; civilian employed, with a job not at work; Armed Forces, with a job but not at work) 00001: Did not work in the United States or in Puerto Rico 00100..08200: Assigned Place of work PUMA. Use with POWSP05. ... count 15327.000000 mean 144.907810 std 345.593937 min -9.000000 25% -9.000000 50% 100.000000 75% 100.000000 max 6890.000000 Name: POWPUMA00, dtype: float64 POWPUMA10: Place of work PUMA based on 2010 Census definition for data collected in 2012 or later bbbbb: N/A (not a worker--not in the labor force, including persons under 16 years; unemployed; civilian employed, with a job not at work; Armed Forces, with a job but not at work) 00001: Did not work in the United States or in Puerto Rico 00100..70100: Assigned Place of work PUMA. Use with POWSP12. ... count 15327.000000 mean 1563.770470 std 8888.575098 min -9.000000 25% -9.000000 50% -9.000000 75% 100.000000 max 59300.000000 Name: POWPUMA10, dtype: float64 POWSP05: Place of work for data collected prior to 2012 - State or foreign country recode bbb: N/A (not a worker--not in the labor force, including persons under 16 years; unemployed; employed, with a job not at work; Armed Forces, with a job but not at work) 001: Alabama/AL 002: Alaska/AK ... count 15327.000000 mean 6.471978 std 18.985725 min -9.000000 25% -9.000000 50% 11.000000 75% 11.000000 max 555.000000 Name: POWSP05, dtype: float64 POWSP12: Place of work for data collected in 2012 or later - State or foreign country recode bbb: N/A (not a worker--not in the labor force, including persons under 16 years; unemployed; employed, with a job not at work; Armed Forces, with a job but not at work) 001: Alabama/AL 002: Alaska/AK ... count 15327.000000 mean 2.357539 std 17.856144 min -9.000000 25% -9.000000 50% -9.000000 75% 11.000000 max 555.000000 Name: POWSP12, dtype: float64 PRIVCOV: Private health insurance coverage recode 1: With private health insurance coverage 2: Without private health insurance coverage count 30559.000000 mean 1.283877 std 0.450885 min 1.000000 25% 1.000000 50% 1.000000 75% 2.000000 max 2.000000 Name: PRIVCOV, dtype: float64 PUBCOV: Public health coverage recode 1: With public health coverage 2: Without public health coverage count 30559.000000 mean 1.651461 std 0.476515 min 1.000000 25% 1.000000 50% 2.000000 75% 2.000000 max 2.000000 Name: PUBCOV, dtype: float64 QTRBIR: Quarter of birth 1: January through March 2: April through June 3: July through September ... count 30559.000000 mean 2.520665 std 1.111741 min 1.000000 25% 2.000000 50% 3.000000 75% 4.000000 max 4.000000 Name: QTRBIR, dtype: float64 RAC1P: Recoded detailed race code 1: White alone 2: Black or African American alone 3: American Indian alone ... count 30559.000000 mean 2.025132 std 1.739391 min 1.000000 25% 1.000000 50% 2.000000 75% 2.000000 max 9.000000 Name: RAC1P, dtype: float64 RAC2P05: Recoded detailed race code for data collected prior to 2012 01: White alone 02: Black or African American alone 03: Apache alone ... count 30559.000000 mean -0.339834 std 13.838677 min -9.000000 25% -9.000000 50% 1.000000 75% 2.000000 max 67.000000 Name: RAC2P05, dtype: float64 RAC2P12: Recoded detailed race code for data collected in 2012 or later 01: White alone 02: Black or African American alone 03: Apache alone ... count 30559.000000 mean -2.417291 std 13.208816 min -9.000000 25% -9.000000 50% -9.000000 75% 1.000000 max 68.000000 Name: RAC2P12, dtype: float64 RAC3P05: Recoded detailed race code for data collected prior to 2012 01: Some other race alone 02: Other Pacific Islander alone 03: Samoan alone ... count 30559.000000 mean 27.004778 std 33.030433 min -9.000000 25% -9.000000 50% 44.000000 75% 68.000000 max 72.000000 Name: RAC3P05, dtype: float64 RAC3P12: Recoded detailed race code for data collected in 2012 or later 001: White alone 002: Black or African American alone 003: American Indian and Alaska Native alone ... count 30559.000000 mean -4.015347 std 7.034421 min -9.000000 25% -9.000000 50% -9.000000 75% 1.000000 max 95.000000 Name: RAC3P12, dtype: float64 RACAIAN: American Indian and Alaska Native recode (American Indian and Alaska Native alone or in combination with one or more other races) 0: No 1: Yes count 30559.000000 mean 0.009490 std 0.096954 min 0.000000 25% 0.000000 50% 0.000000 75% 0.000000 max 1.000000 Name: RACAIAN, dtype: float64 RACASN: Asian recode (Asian alone or in combination with one or more other races) 0: No 1: Yes count 30559.000000 mean 0.044308 std 0.205781 min 0.000000 25% 0.000000 50% 0.000000 75% 0.000000 max 1.000000 Name: RACASN, dtype: float64 RACBLK: Black or African American recode (Black alone or in combination with one or more other races) 0: No 1: Yes count 30559.000000 mean 0.497464 std 0.500002 min 0.000000 25% 0.000000 50% 0.000000 75% 1.000000 max 1.000000 Name: RACBLK, dtype: float64 RACNHPI: Native Hawaiian and Other Pacific Islander recode (Native Hawaiian and Other Pacific Islander alone or in combination with one or more other races) 0: No 1: Yes count 30559.000000 mean 0.001080 std 0.032844 min 0.000000 25% 0.000000 50% 0.000000 75% 0.000000 max 1.000000 Name: RACNHPI, dtype: float64 RACNUM: Number of major race groups represented 1..6: Race groups count 30559.000000 mean 1.027291 std 0.185123 min 1.000000 25% 1.000000 50% 1.000000 75% 1.000000 max 4.000000 Name: RACNUM, dtype: float64 RACSOR: Some other race recode (Some other race alone or in combination with one or more other races) 0: No 1: Yes count 30559.000000 mean 0.027750 std 0.164257 min 0.000000 25% 0.000000 50% 0.000000 75% 0.000000 max 1.000000 Name: RACSOR, dtype: float64 RACWHT: White recode (White alone or in combination with one or more other races) 0: No 1: Yes count 30559.000000 mean 0.447200 std 0.497213 min 0.000000 25% 0.000000 50% 0.000000 75% 1.000000 max 1.000000 Name: RACWHT, dtype: float64 RC: Related child 0: No (includes GQ) 1: Yes count 30559.000000 mean 0.149939 std 0.357018 min 0.000000 25% 0.000000 50% 0.000000 75% 0.000000 max 1.000000 Name: RC, dtype: float64 SCIENGP: Field of degree science and engineering flag - NSF definition b: N/A (less than a bachelor's degree) 1: Yes 2: No ... count 12871.000000 mean 1.479605 std 0.499603 min 1.000000 25% 1.000000 50% 1.000000 75% 2.000000 max 2.000000 Name: SCIENGP, dtype: float64 SCIENGRLP: Field of degree science and engineering related flag - NSF definition b: N/A (less than a bachelor's degree) 1: Yes 2: No ... count 12871.000000 mean 1.951674 std 0.214462 min 1.000000 25% 2.000000 50% 2.000000 75% 2.000000 max 2.000000 Name: SCIENGRLP, dtype: float64 SFN: Subfamily number b: N/A (GQ/not in a subfamily) 1: In subfamily 1 2: In subfamily 2 ... count 923 mean 1 std 0 min 1 25% 1 50% 1 75% 1 max 1 Name: SFN, dtype: float64 SFR: Subfamily relationship b: N/A (GQ/not in a subfamily) 1: Husband/wife no children 2: Husband/wife with children ... count 923.000000 mean 3.963164 std 1.375627 min 1.000000 25% 3.000000 50% 5.000000 75% 5.000000 max 6.000000 Name: SFR, dtype: float64 SOCP00: SOC Occupation code for data collected in 2009 based on 2000 SOC codes bbbbbb: Not in universe (less than 16 years old/NILF who last worked more than 5 years ago or never worked) 111021: MGR-GENERAL AND OPERATIONS MANAGERS 1110XX: MGR-CHIEF EXECUTIVES AND LEGISLATORS * ... count 20557 unique 314 top N.A.// freq 16866 Name: SOCP00, dtype: object SOCP10: SOC Occupation code for data collected in 2010 and 2011 based on 2010 SOC codes bbbbbb: Not in universe (less than 16 years old/NILF who last worked more than 5 years ago or never worked) 1110XX: MGR-CHIEF EXECUTIVES AND LEGISLATORS* 111021: MGR-GENERAL AND OPERATIONS MANAGERS ... count 20557 unique 370 top N.A.// freq 12485 Name: SOCP10, dtype: object SOCP12: SOC Occupation recode for data collected in 2012 or later based on 2010 SOC codes ['NOTE: For additional information on NAICS and SOC groupings within major categories visit our website at: http://www.census.gov/people/io/methodology.'] bbbbbb: N/A (less than 16 years old/NILF who last worked more than 5 years ago or never worked) 1110XX: MGR-CHIEF EXECUTIVES AND LEGISLATORS * 111021: MGR-GENERAL AND OPERATIONS MANAGERS ... count 20557 unique 357 top N.A.// freq 11763 Name: SOCP12, dtype: object VPS: Veteran period of service bb: N/A (less than 17 years old, no active duty) War Times: 01: Gulf War: 9/2001 or later 02: Gulf War: 9/2001 or later and Gulf War: 8/1990 - 8/2001 ... count 1855.000000 mean 6.997844 std 4.099096 min 1.000000 25% 4.000000 50% 6.000000 75% 11.000000 max 15.000000 Name: VPS, dtype: float64 WAOB: World area of birth **** 1: US state (POB = 001-059) 2: PR and US Island Areas (POB = 060-099) 3: Latin America (POB = 303,310-399) ... count 30559.000000 mean 1.457999 std 1.235133 min 1.000000 25% 1.000000 50% 1.000000 75% 1.000000 max 8.000000 Name: WAOB, dtype: float64 num columns described = 145 HOUSING RECORD insp: Column not in data dictionary. count 6561.000000 mean 999.282731 std 1085.174484 min 0.000000 25% 370.000000 50% 790.000000 75% 1200.000000 max 8600.000000 Name: insp, dtype: float64 RT: Record Type H: Housing Record or Group Quarters Unit count 17500 unique 1 top H freq 17500 Name: RT, dtype: object SERIALNO: Housing unit/GQ person serial number 2009000000001..2013999999999: Unique identifier count 1.750000e+04 mean 2.011068e+12 std 1.401911e+09 min 2.009000e+12 25% 2.010000e+12 50% 2.011001e+12 75% 2.012001e+12 max 2.013001e+12 Name: SERIALNO, dtype: float64 DIVISION: Division code 0: Puerto Rico 1: New England (Northeast region) 2: Middle Atlantic (Northeast region) ... count 17500 mean 5 std 0 min 5 25% 5 50% 5 75% 5 max 5 Name: DIVISION, dtype: float64 PUMA00: Public use microdata area code (PUMA) based on Census 2000 definition for data collected prior to 2012. Use in combination with PUMA10. ['Note: Public Use Microdata Areas (PUMAs) designate areas of 100,000 or more population. Use with ST for unique code. PUMA00 applies to data collected in calendar years 2011 and earlier. Users of multi-year datasets that contain data from before and after 2012 should use PUMA00 and PUMA10 together where possible, but not all PUMAs will be comparable. See http://tigerweb.geo.census.gov to identify PUMA changes between the two vintages.'] 00100..08200: Public use microdata area codes 77777: Combination of 01801, 01802, and 01905 in Louisiana -0009: Code classification is Not Applicable because data collected in 2012 or later ... count 17500.000000 mean 56.427371 std 55.291036 min -9.000000 25% -9.000000 50% 101.000000 75% 103.000000 max 105.000000 Name: PUMA00, dtype: float64 PUMA10: Public use microdata area code (PUMA) based on 2010 Census definition for data collected in 2012 or later. Use in combination with PUMA00. ['Note: Public Use Microdata Areas (PUMAs) designate areas of 100,000 or more population. Use with ST for unique code. PUMA10 applies to data collected in calendar year 2012 and later. Users of multi-year datasets that contain data from before and after 2012 should use PUMA00 and PUMA10 together where possible, but not all PUMAs will be comparable. See http://tigerweb.geo.census.gov to identify PUMA changes between the two vintages.'] 00100..70301: Public use microdata area codes -0009: Code classification is Not Applicable because data collected prior to 2012 count 17500.000000 mean 37.764171 std 55.358495 min -9.000000 25% -9.000000 50% -9.000000 75% 103.000000 max 105.000000 Name: PUMA10, dtype: float64 REGION: Region code 1: Northeast 2: Midwest 3: South ... count 17500 mean 3 std 0 min 3 25% 3 50% 3 75% 3 max 3 Name: REGION, dtype: float64 ST: State Code 01: Alabama/AL 02: Alaska/AK 04: Arizona/AZ ... count 17500 mean 11 std 0 min 11 25% 11 50% 11 75% 11 max 11 Name: ST, dtype: float64 ADJHSG: Adjustment factor for housing dollar amounts (6 implied decimal places) ['Note: The values of ADJHSG inflation-adjusts reported housing costs to 2013 dollars and applies to variables CONP, ELEP, FULP, GASP, GRNTP, INSP, MHP, MRGP, SMOCP, RNTP, SMP, and WATP in the housing record. ADJHSG does not apply to AGS or TAXP because they are categorical variables that should not be inflation-adjusted.'] 1086032: 2009 factor 1068395: 2010 factor 1035725: 2011 factor ... count 17500.000000 mean 1039364.231657 std 31877.254257 min 1000000.000000 25% 1014531.000000 50% 1035725.000000 75% 1068395.000000 max 1086032.000000 Name: ADJHSG, dtype: float64 ADJINC: Adjustment factor for income and earnings dollar amounts (6 implied decimal places) ['Note: The values of ADJINC inflation-adjusts reported income to 2013 dollars. ADJINC incorporates an adjustment that annualizes the different rolling reference periods for reported income (as done in the single-year data using the variable ADJINC from the 1-year file) and an adjustment to inflation-adjust the annualized income to 2013 dollars. ADJINC applies to variables FINCP and HINCP in the housing record, and variables INTP, OIP, PAP, PERNP, PINCP, RETP, SEMP, SSIP, SSP, and WAGP in the person record.'] 1085467: 2009 factor (0.999480 * 1.08603175) 1076540: 2010 factor (1.007624 * 1.06839475) 1054614: 2011 factor (1.018237 * 1.03572510) ... count 17500.000000 mean 1048478.770229 std 29598.269890 min 1007549.000000 25% 1024887.000000 50% 1054614.000000 75% 1076540.000000 max 1085467.000000 Name: ADJINC, dtype: float64 WGTP: Housing Weight 0000: Group Quarter placeholder record 00001..09999: Integer weight of housing unit count 17500.000000 mean 17.047257 std 13.878535 min 0.000000 25% 10.000000 50% 14.000000 75% 22.000000 max 172.000000 Name: WGTP, dtype: float64 NP: Number of person records following this housing record 00: Vacant unit 01: One person record (one person in household or any person in group quarters) 02..20: Number of person records (number of persons in household) ... count 17500.000000 mean 1.746229 std 1.291371 min 0.000000 25% 1.000000 50% 1.000000 75% 2.000000 max 13.000000 Name: NP, dtype: float64 TYPE: Type of unit 1: Housing unit 2: Institutional group quarters 3: Noninstitutional group quarters ... count 17500.000000 mean 1.268514 std 0.655686 min 1.000000 25% 1.000000 50% 1.000000 75% 1.000000 max 3.000000 Name: TYPE, dtype: float64 ACR: Lot size b: N/A (GQ/not a one-family house or mobile home) 1: House on less than one acre 2: House on one to less than ten acres ... count 6388.000000 mean 1.028961 std 0.182034 min 1.000000 25% 1.000000 50% 1.000000 75% 1.000000 max 3.000000 Name: ACR, dtype: float64 AGS: Sales of Agriculture Products (Yearly sales) ['Note: No adjustment factor is applied to AGS.'] b: N/A (GQ/vacant/not a one-family house or mobile home/less than 1 acre) 1: None 2: $ 1 - $ 999 ... count 164.000000 mean 1.201220 std 0.934544 min 1.000000 25% 1.000000 50% 1.000000 75% 1.000000 max 6.000000 Name: AGS, dtype: float64 BATH: Bathtub or shower b: N/A (GQ) 1: Yes 2: No ... count 14844.000000 mean 1.005322 std 0.072760 min 1.000000 25% 1.000000 50% 1.000000 75% 1.000000 max 2.000000 Name: BATH, dtype: float64 BDSP: Number of bedrooms bb : N/A (GQ) 00..99: 0 to 99 bedrooms (Top-coded) count 14844.000000 mean 2.171989 std 1.452519 min 0.000000 25% 1.000000 50% 2.000000 75% 3.000000 max 14.000000 Name: BDSP, dtype: float64 BLD: Units in structure bb: N/A (GQ) 01: Mobile home or trailer 02: One-family house detached ... count 14844.000000 mean 5.365131 std 2.671220 min 2.000000 25% 3.000000 50% 5.000000 75% 8.000000 max 9.000000 Name: BLD, dtype: float64 BUS: Business or medical office on property b: N/A (GQ/not a one-family house or mobile home) 1: Yes 2: No ... count 6388.000000 mean 1.986381 std 0.115913 min 1.000000 25% 2.000000 50% 2.000000 75% 2.000000 max 2.000000 Name: BUS, dtype: float64 CONP: Condo fee (monthly amount) ['Note: Use values from ADJHSG to adjust CONP to constant dollars.'] bbbb: N/A (GQ/vacant/not owned or being bought) 0000: Not condo 0001..9999: $1 - $9999 (Rounded and top-coded) ... count 14844.000000 mean 51.061035 std 175.491301 min 0.000000 25% 0.000000 50% 0.000000 75% 0.000000 max 1700.000000 Name: CONP, dtype: float64 ELEP: Electricity (monthly cost) ['Note: Use values from ADJHSG to adjust ELEP values 3 and over to constant dollars.'] bbb: N/A (GQ/vacant) 001: Included in rent or in condo fee 002: No charge or electricity not used ... count 13737.000000 mean 92.849967 std 94.873654 min 1.000000 25% 30.000000 50% 70.000000 75% 130.000000 max 570.000000 Name: ELEP, dtype: float64 FS: Yearly food stamp/Supplemental Nutrition Assistance Program recipiency b: N/A (vacant) 1: Yes 2: No ... count 16393.000000 mean 1.874093 std 0.331755 min 1.000000 25% 2.000000 50% 2.000000 75% 2.000000 max 2.000000 Name: FS, dtype: float64 FULP: Fuel cost(yearly cost for fuels other than gas and electricity) ['Note: Use values from ADJHSG to adjust FULP values 3 and over to constant dollars.'] bbbb: N/A (GQ/vacant) 0001: Included in rent or in condo fee 0002: No charge or these fuels not used ... count 13737.000000 mean 45.400306 std 311.391053 min 1.000000 25% 2.000000 50% 2.000000 75% 2.000000 max 5200.000000 Name: FULP, dtype: float64 GASP: Gas (monthly cost) ['Note: Use values from ADJHSG to adjust GASP values 4 and over to constant dollars.'] bbb: N/A (GQ/vacant) 001: Included in rent or in condo fee 002: Included in electricity payment ... count 13737.000000 mean 56.089758 std 88.354993 min 1.000000 25% 3.000000 50% 10.000000 75% 80.000000 max 580.000000 Name: GASP, dtype: float64 HFL: House heating fuel b: N/A (GQ/vacant) 1: Utility gas 2: Bottled, tank, or LP gas ... count 13737.000000 mean 1.878867 std 1.306775 min 1.000000 25% 1.000000 50% 1.000000 75% 3.000000 max 9.000000 Name: HFL, dtype: float64 MHP: Mobile home costs (yearly amount) ['Note: Use values from ADJHSG to adjust MHP to constant dollars.'] bbbbb: N/A (GQ/vacant/not owned or being bought/ not mobile home) 00000: None 00001..99999: $1 to $99999 (Rounded and top-coded) ... count 0 mean NaN std NaN min NaN 25% NaN 50% NaN 75% NaN max NaN Name: MHP, dtype: float64 MRGI: First mortgage payment includes fire/hazard/flood insurance b: N/A (GQ/vacant/not owned or being bought/not mortgaged) 1: Yes, insurance included in payment 2: No, insurance paid separately or no insurance ... count 4971.000000 mean 1.446590 std 0.497189 min 1.000000 25% 1.000000 50% 1.000000 75% 2.000000 max 2.000000 Name: MRGI, dtype: float64 MRGP: First mortgage payment (monthly amount) ['Note: Use values from ADJHSG to adjust MRGP to constant dollars.'] bbbbb: N/A (GQ/vacant/not owned or being bought/not mortgaged) 00001..99999: $1 to $99999 (Rounded and top-coded) count 4971.000000 mean 1936.767250 std 1244.418262 min 10.000000 25% 1000.000000 50% 1700.000000 75% 2500.000000 max 8000.000000 Name: MRGP, dtype: float64 MRGT: First mortgage payment includes real estate taxes b: N/A (GQ/vacant/not owned or being bought/not mortgaged) 1: Yes, taxes included in payment 2: No, taxes paid separately or taxes not required ... count 4971.000000 mean 1.281633 std 0.449841 min 1.000000 25% 1.000000 50% 1.000000 75% 2.000000 max 2.000000 Name: MRGT, dtype: float64 MRGX: First mortgage status b: N/A (GQ/vacant/not owned or being bought) 1: Mortgage, deed of trust, or similar debt 2: Contract to purchase ... count 6561.000000 mean 1.490626 std 0.857150 min 1.000000 25% 1.000000 50% 1.000000 75% 1.000000 max 3.000000 Name: MRGX, dtype: float64 REFR: Refrigerator b: N/A (GQ) 1: Yes 2: No ... count 14844.000000 mean 1.007949 std 0.088807 min 1.000000 25% 1.000000 50% 1.000000 75% 1.000000 max 2.000000 Name: REFR, dtype: float64 RMSP: Number of Rooms bb: N/A (GQ) 00..99: Rooms (Top-coded) count 14844.000000 mean 5.139854 std 2.895583 min 1.000000 25% 3.000000 50% 4.000000 75% 7.000000 max 24.000000 Name: RMSP, dtype: float64 RNTM: Meals included in rent b: N/A (GQ/not a rental unit/occupied without rent payment) 1: Yes 2: No ... count 7373.000000 mean 1.987658 std 0.110416 min 1.000000 25% 2.000000 50% 2.000000 75% 2.000000 max 2.000000 Name: RNTM, dtype: float64 RNTP: Monthly rent ['Note: Use values from ADJHSG to adjust RNTP to constant dollars.'] bbbbb: N/A (GQ/not a rental unit/occupied without rent payment) 00001..99999: $1 to $99999 (Rounded and top-coded) count 7373.000000 mean 1246.782856 std 769.088231 min 4.000000 25% 730.000000 50% 1100.000000 75% 1700.000000 max 3900.000000 Name: RNTP, dtype: float64 RWAT: Hot and cold running water b: N/A (GQ) 1: Yes 2: No ... count 14844.000000 mean 1.007343 std 0.085379 min 1.000000 25% 1.000000 50% 1.000000 75% 1.000000 max 2.000000 Name: RWAT, dtype: float64 RWATPR: Running water b: N/A (GQ) 1: Yes 2: No ... count 14844 mean 9 std 0 min 9 25% 9 50% 9 75% 9 max 9 Name: RWATPR, dtype: float64 SINK: Sink with a faucet b: N/A (GQ) 1: Yes 2: No ... count 14844.000000 mean 1.005457 std 0.073670 min 1.000000 25% 1.000000 50% 1.000000 75% 1.000000 max 2.000000 Name: SINK, dtype: float64 SMP: Total payment on all second and junior mortgages and home equity loans (monthly amount) ['Note: Use ADJHSG to adjust SMP to constant dollars.'] bbbbb: N/A (GQ/vacant/not owned or being bought/ no second or junior mortgages or home equity loans) 00001..99999: $1 to $99999 (Rounded and top-coded) count 1228.000000 mean 506.485342 std 569.513130 min 4.000000 25% 170.000000 50% 350.000000 75% 600.000000 max 4100.000000 Name: SMP, dtype: float64 STOV: Stove or range b: N/A (GQ) 1: Yes 2: No ... count 14844.000000 mean 1.010105 std 0.100018 min 1.000000 25% 1.000000 50% 1.000000 75% 1.000000 max 2.000000 Name: STOV, dtype: float64 TEL: Telephone ['NOTE: Problems in the collection of data on the availability of telephone service (TEL) in 2012 led to suppressing this variable in six PUMAs in Georgia. This only affects 2012 vintage data. See the Estimation section of the Accuracy of the Data for the 2009-2013 5-year PUMS for more information on PUMS estimates using TEL. http://www.census.gov/acs/www/data_documentation/pums_documentation/'] b: N/A (GQ/vacant) 1: Yes 2: No ... count 13737.000000 mean 1.029701 std 0.169767 min 1.000000 25% 1.000000 50% 1.000000 75% 1.000000 max 2.000000 Name: TEL, dtype: float64 TEN: Tenure b: N/A (GQ/vacant) 1: Owned with mortgage or loan (include home equity loans) 2: Owned free and clear ... count 13737.000000 mean 2.174128 std 0.945958 min 1.000000 25% 1.000000 50% 3.000000 75% 3.000000 max 4.000000 Name: TEN, dtype: float64 TOIL: Flush toilet b: N/A (GQ) 1: Yes 2: No ... count 14844.000000 mean 1.005591 std 0.074569 min 1.000000 25% 1.000000 50% 1.000000 75% 1.000000 max 2.000000 Name: TOIL, dtype: float64 VACS: Vacancy status b: N/A (GQ/occupied) 1: For rent 2: Rented, not occupied ... count 1107.000000 mean 4.214995 std 2.565742 min 1.000000 25% 1.000000 50% 4.000000 75% 7.000000 max 7.000000 Name: VACS, dtype: float64 VALP: Property value bbbbbbb: N/A (GQ/vacant units, except �for-sale-only� and �sold, not occupied�/not owned or being bought) 0000000: $0 (applies to 2009 and 2010 only) 0000001..9999999: $1 to $9999999 (Rounded and top-coded) ... count 6741.000000 mean 576821.155615 std 578742.887940 min 180.000000 25% 290000.000000 50% 410000.000000 75% 700000.000000 max 5303000.000000 Name: VALP, dtype: float64 VEH: Vehicles (1 ton or less) available b: N/A (GQ/vacant) 0: No vehicles 1: 1 vehicle ... count 13737.000000 mean 0.925311 std 0.869523 min 0.000000 25% 0.000000 50% 1.000000 75% 1.000000 max 6.000000 Name: VEH, dtype: float64 WATP: Water (yearly cost) ['Note: Use values from ADJHSG to adjust WATP values 3 and over to constant dollars.'] bbbb: N/A (GQ/vacant) 0001: Included in rent or in condo fee 0002: No charge ... count 13737.000000 mean 239.380724 std 412.715268 min 1.000000 25% 1.000000 50% 2.000000 75% 390.000000 max 3900.000000 Name: WATP, dtype: float64 YBL: When structure first built bb: N/A (GQ) 01: 1939 or earlier 02: 1940 to 1949 ... count 14844.000000 mean 3.239289 std 2.768117 min 1.000000 25% 1.000000 50% 2.000000 75% 4.000000 max 16.000000 Name: YBL, dtype: float64 FES: Family type and employment status b: N/A (GQ/vacant/not a family) 1: Married-couple family: Husband and wife in LF 2: Married-couple family: Husband in labor force, wife not in LF ... count 5929.000000 mean 3.998988 std 2.787092 min 1.000000 25% 1.000000 50% 4.000000 75% 7.000000 max 8.000000 Name: FES, dtype: float64 FINCP: Family income (past 12 months) ['Note: Use values from ADJINC to adjust FINCP to constant dollars.'] bbbbbbbb: N/A (GQ/vacant) 00000000: No family income -0059999: Loss of -$59,999 or more ... count 5953.000000 mean 130596.028725 std 154445.776061 min 0.000000 25% 36000.000000 50% 84500.000000 75% 166000.000000 max 2087000.000000 Name: FINCP, dtype: float64 FPARC: Family presence and age of related children b: N/A (GQ/vacant/not a family) 1: With related children under 5 years only 2: With related children 5 to 17 years only ... count 5953.000000 mean 3.076432 std 1.124643 min 1.000000 25% 2.000000 50% 4.000000 75% 4.000000 max 4.000000 Name: FPARC, dtype: float64 GRNTP: Gross rent (monthly amount) ['Note: Use values from ADJHSG to adjust GRNTP to constant dollars.'] bbbbb: N/A (GQ/vacant/not a rental unit/occupied without rent payment) 00001..99999: $1 - $99999 (Components are rounded) count 6989.000000 mean 1349.524109 std 791.201141 min 4.000000 25% 819.000000 50% 1200.000000 75% 1800.000000 max 4510.000000 Name: GRNTP, dtype: float64 GRPIP: Gross rent as a percentage of household income past 12 months bbb: N/A (GQ/vacant/not a rental unit/occupied without rent payment/no household income) 001..100: 1% to 100% 101: 101% or more ... count 6805.000000 mean 39.897575 std 28.575415 min 1.000000 25% 20.000000 50% 29.000000 75% 51.000000 max 101.000000 Name: GRPIP, dtype: float64 HHL: Household language b: N/A (GQ/vacant) 1: English only 2: Spanish ... count 13737.000000 mean 1.336245 std 0.850583 min 1.000000 25% 1.000000 50% 1.000000 75% 1.000000 max 5.000000 Name: HHL, dtype: float64 HHT: Household/family type b: N/A (GQ/vacant) 1: Married couple household 2: Other family household:Male householder, no wife present ... count 13737.000000 mean 3.805198 std 2.036160 min 1.000000 25% 2.000000 50% 4.000000 75% 6.000000 max 7.000000 Name: HHT, dtype: float64 HINCP: Household income (past 12 months) ['Note: Use values from ADJINC to adjust HINCP to constant dollars.'] bbbbbbbb: N/A(GQ/vacant) 00000000: No household income -0059999: Loss of -$59,999 or more ... count 13737.000000 mean 102051.604353 std 125888.164393 min -13600.000000 25% 29200.000000 50% 67000.000000 75% 128000.000000 max 2087000.000000 Name: HINCP, dtype: float64 HUGCL: Household with grandparent living with grandchildren b: N/A (GQ/vacant) 0: Household without grandparent living with grandchildren 1: Household with grandparent living with grandchildren ... count 13737.000000 mean 0.033559 std 0.180098 min 0.000000 25% 0.000000 50% 0.000000 75% 0.000000 max 1.000000 Name: HUGCL, dtype: float64 HUPAC: HH presence and age of children b: N/A (GQ/vacant) 1: With children under 6 years only 2: With children 6 to 17 years only ... count 13737.000000 mean 3.591541 std 0.888889 min 1.000000 25% 4.000000 50% 4.000000 75% 4.000000 max 4.000000 Name: HUPAC, dtype: float64 HUPAOC: HH presence and age of own children b: N/A (GQ/vacant) 1: Presence of own children under 6 years only 2: Presence of own children 6 to 17 years only ... count 13737.000000 mean 3.652617 std 0.836272 min 1.000000 25% 4.000000 50% 4.000000 75% 4.000000 max 4.000000 Name: HUPAOC, dtype: float64 HUPARC: HH presence and age of related children b: N/A (GQ/vacant) 1: Presence of related children under 6 years only 2: Presence of related children 6 to 17 years only ... count 13737.000000 mean 3.594599 std 0.886682 min 1.000000 25% 4.000000 50% 4.000000 75% 4.000000 max 4.000000 Name: HUPARC, dtype: float64 KIT: Complete kitchen facilities b: N/A (GQ) 1: Yes, has stove or range, refrigerator, and sink with a faucet 2: No ... count 14844.000000 mean 1.011587 std 0.107022 min 1.000000 25% 1.000000 50% 1.000000 75% 1.000000 max 2.000000 Name: KIT, dtype: float64 LNGI: Limited English speaking households b: N/A (GQ/vacant) 1: At least one person in the household 14 and over speaks English only or speaks English 'very well' 2: No one in the household 14 and over speaks English only or speaks English 'very well' ... count 13737.000000 mean 1.023368 std 0.151073 min 1.000000 25% 1.000000 50% 1.000000 75% 1.000000 max 2.000000 Name: LNGI, dtype: float64 MULTG: Multigenerational Household b: N/A (GQ/Vacant/NP=0) 1: No, not a multigenerational household 2: Yes, is a multigenerational household ... count 13737.000000 mean 1.032977 std 0.178582 min 1.000000 25% 1.000000 50% 1.000000 75% 1.000000 max 2.000000 Name: MULTG, dtype: float64 MV: When moved into this house or apartment b: N/A (GQ/vacant) 1: 12 months or less 2: 13 to 23 months ... count 13737.000000 mean 3.800466 std 1.936343 min 1.000000 25% 2.000000 50% 4.000000 75% 5.000000 max 7.000000 Name: MV, dtype: float64 NOC: Number of own children in household (unweighted) bb: N/A(GQ/vacant) 00: No own children 01..19: Number of own children in household ... count 13737.000000 mean 0.277572 std 0.727938 min 0.000000 25% 0.000000 50% 0.000000 75% 0.000000 max 8.000000 Name: NOC, dtype: float64 NPF: Number of persons in family (unweighted) bb: N/A (GQ/vacant/non-family household) 02..20: Number of persons in family count 5953.000000 mean 2.893331 std 1.224969 min 2.000000 25% 2.000000 50% 2.000000 75% 3.000000 max 12.000000 Name: NPF, dtype: float64 NPP: Grandparent headed household with no parent present b: N/A (GQ/vacant) 0: Not a grandparent headed household with no parent present 1: Grandparent headed household with no parent present ... count 13737.000000 mean 0.004586 std 0.067568 min 0.000000 25% 0.000000 50% 0.000000 75% 0.000000 max 1.000000 Name: NPP, dtype: float64 NR: Presence of nonrelative in household b: N/A (GQ/vacant) 0: None 1: 1 or more nonrelatives ... count 13737.000000 mean 0.159132 std 0.365813 min 0.000000 25% 0.000000 50% 0.000000 75% 0.000000 max 1.000000 Name: NR, dtype: float64 NRC: Number of related children in household (unweighted) bb: N/A (GQ/vacant) 00: No related children 01..19: Number of related children in household ... count 13737.000000 mean 0.333552 std 0.806915 min 0.000000 25% 0.000000 50% 0.000000 75% 0.000000 max 10.000000 Name: NRC, dtype: float64 OCPIP: Selected monthly owner costs as a percentage of household income during the past 12 months bbb: N/A (GQ/vacant/not owned or being bought/ no household income) 001..100: 1% to 100% 101: 101% or more ... count 6500.000000 mean 26.966923 std 23.259257 min 1.000000 25% 12.000000 50% 20.000000 75% 32.000000 max 101.000000 Name: OCPIP, dtype: float64 PARTNER: Unmarried partner household b: N/A (GQ/vacant) 0: No unmarried partner in household 1: Male householder, male partner ... count 13737.000000 mean 0.188032 std 0.751881 min 0.000000 25% 0.000000 50% 0.000000 75% 0.000000 max 4.000000 Name: PARTNER, dtype: float64 PLM: Complete plumbing facilities b: N/A (GQ) 1: Yes, has hot and cold running water, a flush toilet, and a bathtub or shower 2: No ... count 14844.000000 mean 1.008690 std 0.092819 min 1.000000 25% 1.000000 50% 1.000000 75% 1.000000 max 2.000000 Name: PLM, dtype: float64 PSF: Presence of subfamilies in Household b: N/A (GQ/vacant) 0: No subfamilies 1: 1 or more subfamilies ... count 13737.000000 mean 0.026789 std 0.161472 min 0.000000 25% 0.000000 50% 0.000000 75% 0.000000 max 1.000000 Name: PSF, dtype: float64 R18: Presence of persons under 18 years in household (unweighted) b: N/A (GQ/vacant) 0: No person under 18 in household 1: 1 or more persons under 18 in household ... count 13737.000000 mean 0.193638 std 0.395163 min 0.000000 25% 0.000000 50% 0.000000 75% 0.000000 max 1.000000 Name: R18, dtype: float64 R60: Presence of persons 60 years and over in household (unweighted) b: N/A (GQ/vacant) 0: No person 60 and over 1: 1 person 60 and over ... count 13737.000000 mean 0.415447 std 0.647227 min 0.000000 25% 0.000000 50% 0.000000 75% 1.000000 max 2.000000 Name: R60, dtype: float64 R65: Presence of persons 65 years and over in household (unweighted) b: N/A (GQ/vacant) 0: No person 65 and over 1: 1 person 65 and over ... count 13737.000000 mean 0.297809 std 0.568071 min 0.000000 25% 0.000000 50% 0.000000 75% 0.000000 max 2.000000 Name: R65, dtype: float64 RESMODE: Response mode b: N/A (GQ) 1: Mail 2: CATI/CAPI ... count 14844.000000 mean 1.523579 std 0.657194 min 1.000000 25% 1.000000 50% 1.000000 75% 2.000000 max 3.000000 Name: RESMODE, dtype: float64 SMOCP: Selected monthly owner costs ['Note: Use values from ADJHSG to adjust SMOCP to constant dollars.'] bbbbb: N/A (GQ/vacant/not owned or being bought) 00000: None 00001..99999: $1 - $99999 (Components are rounded) ... count 6561.000000 mean 2099.427526 std 1528.415666 min 0.000000 25% 956.000000 50% 1821.000000 75% 2810.000000 max 12113.000000 Name: SMOCP, dtype: float64 SMX: Second or junior mortgage or home equity loan status b: N/A (GQ/vacant/not owned or being bought) 1: Yes, a second mortgage 2: Yes, a home equity loan ... count 4971.000000 mean 2.709113 std 0.597341 min 1.000000 25% 3.000000 50% 3.000000 75% 3.000000 max 4.000000 Name: SMX, dtype: float64 SRNT: Specified rent unit b: N/A 0: Not specified rent unit 1: Specified rent unit ... count 14844.000000 mean 0.509229 std 0.499932 min 0.000000 25% 0.000000 50% 1.000000 75% 1.000000 max 1.000000 Name: SRNT, dtype: float64 SVAL: Specified value owner unit b: N/A 0: Not specified value owner unit 1: Specified value owner unit ... count 14844.000000 mean 0.328887 std 0.469825 min 0.000000 25% 0.000000 50% 0.000000 75% 1.000000 max 1.000000 Name: SVAL, dtype: float64 TAXP: Property taxes (yearly amount) ['Note: No adjustment factor is applied to TAXP.'] bb: N/A (GQ/vacant/not owned or being bought) 01: None 02: $ 1 - $ 49 ... count 6561.000000 mean 36.513032 std 19.481382 min 1.000000 25% 22.000000 50% 33.000000 75% 53.000000 max 68.000000 Name: TAXP, dtype: float64 WIF: Workers in family during the past 12 months b: N/A (GQ/vacant/non-family household) 0: No workers 1: 1 worker ... count 5953.000000 mean 1.432555 std 0.824398 min 0.000000 25% 1.000000 50% 2.000000 75% 2.000000 max 3.000000 Name: WIF, dtype: float64 WKEXREL: Work experience of householder and spouse bb: N/A (GQ/vacant/not a family) 01: Householder and spouse worked FT 02: Householder worked FT; spouse worked < FT ... count 5953.000000 mean 7.787166 std 5.420605 min 1.000000 25% 2.000000 50% 8.000000 75% 13.000000 max 15.000000 Name: WKEXREL, dtype: float64 WORKSTAT: Work status of householder or spouse in family households bb: N/A (GQ/not a family household) 01: Husband and wife both in labor force, both employed or in Armed Forces 02: Husband and wife both in labor force, husband employed or in Armed Forces, wife unemployed ... count 5929.000000 mean 7.442570 std 5.594645 min 1.000000 25% 1.000000 50% 9.000000 75% 13.000000 max 15.000000 Name: WORKSTAT, dtype: float64 num columns described = 84
print("`dfe`: Estimates for user verification filtered for 'District of Columbia'.")
dfe = pd.read_csv(path_ecsv)
tfmask_dc = dfe['state'] == 'District of Columbia'
dfe_dc = dfe.loc[tfmask_dc]
dfe_dc
`dfe`: Estimates for user verification filtered for 'District of Columbia'.
st | state | characteristic | pums_est_09_to_13 | pums_se_09_to_13 | pums_moe_09_to_13 | |
---|---|---|---|---|---|---|
288 | 11 | District of Columbia | Total population | 619,371 | 0 | 0 |
289 | 11 | District of Columbia | Housing unit population (RELP=0-15) | 579,281 | 0 | 0 |
290 | 11 | District of Columbia | GQ population (RELP=16-17) | 40,090 | 0 | 0 |
291 | 11 | District of Columbia | GQ institutional population (RELP=16) | 7,443 | 80 | 132 |
292 | 11 | District of Columbia | GQ noninstitutional population (RELP=17) | 32,647 | 80 | 132 |
293 | 11 | District of Columbia | Total males (SEX=1) | 292,566 | 361 | 595 |
294 | 11 | District of Columbia | Total females (SEX=2) | 326,805 | 361 | 595 |
295 | 11 | District of Columbia | Age 0-4 | 36,530 | 253 | 417 |
296 | 11 | District of Columbia | Age 5-9 | 27,658 | 636 | 1046 |
297 | 11 | District of Columbia | Age 10-14 | 24,621 | 598 | 984 |
298 | 11 | District of Columbia | Age 15-19 | 40,950 | 825 | 1357 |
299 | 11 | District of Columbia | Age 20-24 | 58,828 | 779 | 1281 |
300 | 11 | District of Columbia | Age 25-34 | 134,025 | 526 | 865 |
301 | 11 | District of Columbia | Age 35-44 | 84,310 | 534 | 878 |
302 | 11 | District of Columbia | Age 45-54 | 75,981 | 435 | 716 |
303 | 11 | District of Columbia | Age 55-59 | 35,191 | 599 | 985 |
304 | 11 | District of Columbia | Age 60-64 | 31,070 | 590 | 970 |
305 | 11 | District of Columbia | Age 65-74 | 38,245 | 295 | 485 |
306 | 11 | District of Columbia | Age 75-84 | 22,283 | 420 | 690 |
307 | 11 | District of Columbia | Age 85 and over | 9,679 | 377 | 619 |
308 | 11 | District of Columbia | Total housing units (TYPE=1) | 298,327 | 113 | 185 |
309 | 11 | District of Columbia | Total occupied units | 263,650 | 965 | 1588 |
310 | 11 | District of Columbia | Owner occupied units (TEN in 1,2) | 110,362 | 1363 | 2242 |
311 | 11 | District of Columbia | Renter occupied units (TEN in 3,4) | 153,288 | 1486 | 2444 |
312 | 11 | District of Columbia | Owned with a mortgage (TEN=1) | 85,483 | 1208 | 1988 |
313 | 11 | District of Columbia | Owned free and clear (TEN=2) | 24,879 | 565 | 929 |
314 | 11 | District of Columbia | Rented for cash (TEN=3) | 149,500 | 1511 | 2485 |
315 | 11 | District of Columbia | No cash rent (TEN=4) | 3,788 | 262 | 431 |
316 | 11 | District of Columbia | Total vacant units | 34,677 | 920 | 1514 |
317 | 11 | District of Columbia | For rent (VACS=1) | 10,686 | 618 | 1017 |
318 | 11 | District of Columbia | For sale only (VACS=3) | 2,953 | 325 | 534 |
319 | 11 | District of Columbia | All Other Vacant (VACS in 2,4,5,6,7) | 21,038 | 849 | 1397 |
print("`dfe`: Verify characteristic estimates, direct standard errors, and margin of error.")
# Verify the estimates following
# https://www.census.gov/programs-surveys/acs/
# technical-documentation/pums/documentation.2013.html
# tech_docs/pums/accuracy/2009_2013AccuracyPUMS.pdf
print()
tfmask_test_strs = collections.OrderedDict([
('PERSON RECORD', collections.OrderedDict([
('Total population', "np.asarray([True]*len(dfp))"),
('Housing unit population (RELP=0-15)',"np.logical_and(0 <= dfp['RELP'], dfp['RELP'] <= 15)"),
('GQ population (RELP=16-17)', "np.logical_and(16 <= dfp['RELP'], dfp['RELP'] <= 17)"),
('GQ institutional population (RELP=16)', "dfp['RELP'] == 16"),
('GQ noninstitutional population (RELP=17)', "dfp['RELP'] == 17"),
('Total males (SEX=1)', "dfp['SEX'] == 1"),
('Total females (SEX=2)', "dfp['SEX'] == 2"),
('Age 0-4', "np.logical_and(0 <= dfp['AGEP'], dfp['AGEP'] <= 4)"),
('Age 5-9', "np.logical_and(5 <= dfp['AGEP'], dfp['AGEP'] <= 9)"),
('Age 10-14', "np.logical_and(10 <= dfp['AGEP'], dfp['AGEP'] <= 14)"),
('Age 15-19', "np.logical_and(15 <= dfp['AGEP'], dfp['AGEP'] <= 19)"),
('Age 20-24', "np.logical_and(20 <= dfp['AGEP'], dfp['AGEP'] <= 24)"),
('Age 25-34', "np.logical_and(25 <= dfp['AGEP'], dfp['AGEP'] <= 34)"),
('Age 35-44', "np.logical_and(35 <= dfp['AGEP'], dfp['AGEP'] <= 44)"),
('Age 45-54', "np.logical_and(45 <= dfp['AGEP'], dfp['AGEP'] <= 54)"),
('Age 55-59', "np.logical_and(55 <= dfp['AGEP'], dfp['AGEP'] <= 59)"),
('Age 60-64', "np.logical_and(60 <= dfp['AGEP'], dfp['AGEP'] <= 64)"),
('Age 65-74', "np.logical_and(65 <= dfp['AGEP'], dfp['AGEP'] <= 74)"),
('Age 75-84', "np.logical_and(75 <= dfp['AGEP'], dfp['AGEP'] <= 84)"),
('Age 85 and over', "85 <= dfp['AGEP']")])),
('HOUSING RECORD', collections.OrderedDict([
('Total housing units (TYPE=1)', "dfh['TYPE'] == 1"),
('Total occupied units', "dfh['TEN'].notnull()"),
('Owner occupied units (TEN in 1,2)', "np.logical_or(dfh['TEN'] == 1, dfh['TEN'] == 2)"),
('Renter occupied units (TEN in 3,4)', "np.logical_or(dfh['TEN'] == 3, dfh['TEN'] == 4)"),
('Owned with a mortgage (TEN=1)', "dfh['TEN'] == 1"),
('Owned free and clear (TEN=2)', "dfh['TEN'] == 2"),
('Rented for cash (TEN=3)', "dfh['TEN'] == 3"),
('No cash rent (TEN=4)', "dfh['TEN'] == 4"),
('Total vacant units', "dfh['TEN'].isnull()"),
('For rent (VACS=1)', "dfh['VACS'] == 1"),
('For sale only (VACS=3)', "dfh['VACS'] == 3"),
('All Other Vacant (VACS in 2,4,5,6,7)',
"functools.reduce(np.logical_or, (dfh['VACS'] == vacs for vacs in [2,4,5,6,7]))")]))])
for record_type in records_dfs:
print("'{rt}'".format(rt=record_type))
df = records_dfs[record_type]['dataframe']
wt = records_dfs[record_type]['weight']
wts = records_dfs[record_type]['replicate_weights']
for char in tfmask_test_strs[record_type]:
print(" '{char}'".format(char=char))
# Select the reference verification data
# and the records for the characteristic.
tfmask_ref = dfe_dc['characteristic'] == char
tfmask_test = eval(tfmask_test_strs[record_type][char])
# Calculate and verify the estimate ('est') for the characteristic.
# The estimate is the sum of the sample weights 'WGTP'.
col = 'pums_est_09_to_13'
print(" '{col}':".format(col=col), end=' ')
ref_est = int(dfe_dc.loc[tfmask_ref, col].values[0].replace(',', ''))
test_est = df.loc[tfmask_test, wt].sum()
assert np.isclose(ref_est, test_est, rtol=0, atol=1)
print("(ref, test) = {tup}".format(tup=(ref_est, test_est)))
# Calculate and verify the "direct standard error" ('se') of the estimate.
# The direct standard error is a modified root-mean-square deviation
# using the "replicate weights" 'WGTP[1-80]'.
col = 'pums_se_09_to_13'
print(" '{col}' :".format(col=col), end=' ')
ref_se = dfe_dc.loc[tfmask_ref, col].values[0]
test_se = ((4/80)*((df.loc[tfmask_test, wts].sum() - test_est)**2).sum())**0.5
assert np.isclose(ref_se, test_se, rtol=0, atol=1)
print("(ref, test) = {tup}".format(tup=(ref_se, test_se)))
# Calculate and verify the margin of error ('moe') at the
# 90% confidence level (+/- 1.645 standard errors).
col = 'pums_moe_09_to_13'
print(" '{col}':".format(col=col), end=' ')
ref_moe = dfe_dc.loc[tfmask_ref, col].values[0]
test_moe = 1.645*test_se
assert np.isclose(ref_moe, test_moe, rtol=0, atol=1)
print("(ref, test) = {tup}".format(tup=(ref_moe, test_moe)))
`dfe`: Verify characteristic estimates, direct standard errors, and margin of error. 'PERSON RECORD' 'Total population' 'pums_est_09_to_13': (ref, test) = (619371, 619371) 'pums_se_09_to_13' : (ref, test) = (0, 0.0) 'pums_moe_09_to_13': (ref, test) = (0, 0.0) 'Housing unit population (RELP=0-15)' 'pums_est_09_to_13': (ref, test) = (579281, 579281) 'pums_se_09_to_13' : (ref, test) = (0, 0.0) 'pums_moe_09_to_13': (ref, test) = (0, 0.0) 'GQ population (RELP=16-17)' 'pums_est_09_to_13': (ref, test) = (40090, 40090) 'pums_se_09_to_13' : (ref, test) = (0, 0.0) 'pums_moe_09_to_13': (ref, test) = (0, 0.0) 'GQ institutional population (RELP=16)' 'pums_est_09_to_13': (ref, test) = (7443, 7443) 'pums_se_09_to_13' : (ref, test) = (80, 80.30971298666184) 'pums_moe_09_to_13': (ref, test) = (132, 132.10947786305871) 'GQ noninstitutional population (RELP=17)' 'pums_est_09_to_13': (ref, test) = (32647, 32647) 'pums_se_09_to_13' : (ref, test) = (80, 80.30971298666184) 'pums_moe_09_to_13': (ref, test) = (132, 132.10947786305871) 'Total males (SEX=1)' 'pums_est_09_to_13': (ref, test) = (292566, 292566) 'pums_se_09_to_13' : (ref, test) = (361, 361.4210148843036) 'pums_moe_09_to_13': (ref, test) = (595, 594.5375694846794) 'Total females (SEX=2)' 'pums_est_09_to_13': (ref, test) = (326805, 326805) 'pums_se_09_to_13' : (ref, test) = (361, 361.4210148843036) 'pums_moe_09_to_13': (ref, test) = (595, 594.5375694846794) 'Age 0-4' 'pums_est_09_to_13': (ref, test) = (36530, 36530) 'pums_se_09_to_13' : (ref, test) = (253, 253.37699185206222) 'pums_moe_09_to_13': (ref, test) = (417, 416.80515159664236) 'Age 5-9' 'pums_est_09_to_13': (ref, test) = (27658, 27658) 'pums_se_09_to_13' : (ref, test) = (636, 635.5916141674621) 'pums_moe_09_to_13': (ref, test) = (1046, 1045.5482053054752) 'Age 10-14' 'pums_est_09_to_13': (ref, test) = (24621, 24621) 'pums_se_09_to_13' : (ref, test) = (598, 598.0936799532328) 'pums_moe_09_to_13': (ref, test) = (984, 983.864103523068) 'Age 15-19' 'pums_est_09_to_13': (ref, test) = (40950, 40950) 'pums_se_09_to_13' : (ref, test) = (825, 825.0349386541154) 'pums_moe_09_to_13': (ref, test) = (1357, 1357.18247408602) 'Age 20-24' 'pums_est_09_to_13': (ref, test) = (58828, 58828) 'pums_se_09_to_13' : (ref, test) = (779, 778.715930233869) 'pums_moe_09_to_13': (ref, test) = (1281, 1280.9877052347144) 'Age 25-34' 'pums_est_09_to_13': (ref, test) = (134025, 134025) 'pums_se_09_to_13' : (ref, test) = (526, 525.9921102069878) 'pums_moe_09_to_13': (ref, test) = (865, 865.257021290495) 'Age 35-44' 'pums_est_09_to_13': (ref, test) = (84310, 84310) 'pums_se_09_to_13' : (ref, test) = (534, 533.5205244411877) 'pums_moe_09_to_13': (ref, test) = (878, 877.6412627057538) 'Age 45-54' 'pums_est_09_to_13': (ref, test) = (75981, 75981) 'pums_se_09_to_13' : (ref, test) = (435, 435.0808545546448) 'pums_moe_09_to_13': (ref, test) = (716, 715.7080057423907) 'Age 55-59' 'pums_est_09_to_13': (ref, test) = (35191, 35191) 'pums_se_09_to_13' : (ref, test) = (599, 598.5786915686191) 'pums_moe_09_to_13': (ref, test) = (985, 984.6619476303784) 'Age 60-64' 'pums_est_09_to_13': (ref, test) = (31070, 31070) 'pums_se_09_to_13' : (ref, test) = (590, 589.6810154651412) 'pums_moe_09_to_13': (ref, test) = (970, 970.0252704401572) 'Age 65-74' 'pums_est_09_to_13': (ref, test) = (38245, 38245) 'pums_se_09_to_13' : (ref, test) = (295, 295.0997289053313) 'pums_moe_09_to_13': (ref, test) = (485, 485.43905404927) 'Age 75-84' 'pums_est_09_to_13': (ref, test) = (22283, 22283) 'pums_se_09_to_13' : (ref, test) = (420, 419.69280432239964) 'pums_moe_09_to_13': (ref, test) = (690, 690.3946631103474) 'Age 85 and over' 'pums_est_09_to_13': (ref, test) = (9679, 9679) 'pums_se_09_to_13' : (ref, test) = (377, 376.5637396245156) 'pums_moe_09_to_13': (ref, test) = (619, 619.4473516823282) 'HOUSING RECORD' 'Total housing units (TYPE=1)' 'pums_est_09_to_13': (ref, test) = (298327, 298327) 'pums_se_09_to_13' : (ref, test) = (113, 112.68873058118989) 'pums_moe_09_to_13': (ref, test) = (185, 185.37296180605736) 'Total occupied units' 'pums_est_09_to_13': (ref, test) = (263650, 263650) 'pums_se_09_to_13' : (ref, test) = (965, 965.0778984102786) 'pums_moe_09_to_13': (ref, test) = (1588, 1587.5531428849083) 'Owner occupied units (TEN in 1,2)' 'pums_est_09_to_13': (ref, test) = (110362, 110362) 'pums_se_09_to_13' : (ref, test) = (1363, 1363.1910174293257) 'pums_moe_09_to_13': (ref, test) = (2242, 2242.449223671241) 'Renter occupied units (TEN in 3,4)' 'pums_est_09_to_13': (ref, test) = (153288, 153288) 'pums_se_09_to_13' : (ref, test) = (1486, 1485.6482760061347) 'pums_moe_09_to_13': (ref, test) = (2444, 2443.8914140300917) 'Owned with a mortgage (TEN=1)' 'pums_est_09_to_13': (ref, test) = (85483, 85483) 'pums_se_09_to_13' : (ref, test) = (1208, 1208.399126944405) 'pums_moe_09_to_13': (ref, test) = (1988, 1987.8165638235462) 'Owned free and clear (TEN=2)' 'pums_est_09_to_13': (ref, test) = (24879, 24879) 'pums_se_09_to_13' : (ref, test) = (565, 565.0110618386157) 'pums_moe_09_to_13': (ref, test) = (929, 929.4431967245227) 'Rented for cash (TEN=3)' 'pums_est_09_to_13': (ref, test) = (149500, 149500) 'pums_se_09_to_13' : (ref, test) = (1511, 1510.8262970970554) 'pums_moe_09_to_13': (ref, test) = (2485, 2485.309258724656) 'No cash rent (TEN=4)' 'pums_est_09_to_13': (ref, test) = (3788, 3788) 'pums_se_09_to_13' : (ref, test) = (262, 262.1715087495207) 'pums_moe_09_to_13': (ref, test) = (431, 431.2721318929615) 'Total vacant units' 'pums_est_09_to_13': (ref, test) = (34677, 34677) 'pums_se_09_to_13' : (ref, test) = (920, 920.3688391074527) 'pums_moe_09_to_13': (ref, test) = (1514, 1514.0067403317596) 'For rent (VACS=1)' 'pums_est_09_to_13': (ref, test) = (10686, 10686) 'pums_se_09_to_13' : (ref, test) = (618, 618.3948172486571) 'pums_moe_09_to_13': (ref, test) = (1017, 1017.2594743740409) 'For sale only (VACS=3)' 'pums_est_09_to_13': (ref, test) = (2953, 2953) 'pums_se_09_to_13' : (ref, test) = (325, 324.77245880770124) 'pums_moe_09_to_13': (ref, test) = (534, 534.2506947386686) 'All Other Vacant (VACS in 2,4,5,6,7)' 'pums_est_09_to_13': (ref, test) = (21038, 21038) 'pums_se_09_to_13' : (ref, test) = (849, 849.4756618055635) 'pums_moe_09_to_13': (ref, test) = (1397, 1397.3874636701519)
# Export ipynb to html
for template in ['basic', 'full']:
path_html = os.path.splitext(path_ipynb)[0]+'-'+template+'.html'
cmd = ['jupyter', 'nbconvert', '--to', 'html', '--template', template, path_ipynb, '--output', path_html]
print(' '.join(cmd))
subprocess.run(args=cmd, check=True)
print()
jupyter nbconvert --to html --template basic /home/samuel_harrold/stharrold.github.io/content/static/20160110-etl-census-with-python/20160110-etl-census-with-python.ipynb --output /home/samuel_harrold/stharrold.github.io/content/static/20160110-etl-census-with-python/20160110-etl-census-with-python-basic.html jupyter nbconvert --to html --template full /home/samuel_harrold/stharrold.github.io/content/static/20160110-etl-census-with-python/20160110-etl-census-with-python.ipynb --output /home/samuel_harrold/stharrold.github.io/content/static/20160110-etl-census-with-python/20160110-etl-census-with-python-full.html