{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 20160110-etl-census-with-python\n",
"\n",
"Related post: \n",
"https://stharrold.github.io/20160110-etl-census-with-python.html\n",
"\n",
"Data documentation: \n",
"https://www.census.gov/programs-surveys/acs/technical-documentation/pums/documentation.2013.html"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Initialization"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Imports"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/home/samuel_harrold\n"
]
}
],
"source": [
"cd ~"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Import standard packages.\n",
"import collections\n",
"import functools\n",
"import os\n",
"import pdb # Debug with pdb.\n",
"import subprocess\n",
"import sys\n",
"import time\n",
"# Import installed packages.\n",
"import numpy as np\n",
"import pandas as pd\n",
"# Import local packages.\n",
"# Insert current directory into module search path.\n",
"# Autoreload local packages after editing.\n",
"# `dsdemos` version: https://github.com/stharrold/dsdemos/releases/tag/v0.0.3\n",
"sys.path.insert(0, os.path.join(os.path.curdir, r'dsdemos'))\n",
"%reload_ext autoreload\n",
"%autoreload 2\n",
"import dsdemos as dsd"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Timestamp:\n",
"2016-02-08T04:30:52GMT\n",
"\n",
"Versions:\n",
"Python: sys.version_info(major=3, minor=5, micro=1, releaselevel='final', serial=0)\n",
"numpy: 1.10.2\n",
"pandas: 0.17.1\n"
]
}
],
"source": [
"print(\"Timestamp:\")\n",
"print(time.strftime(r'%Y-%m-%dT%H:%M:%S%Z', time.gmtime()))\n",
"print()\n",
"print(\"Versions:\")\n",
"print(\"Python:\", sys.version_info)\n",
"print(\"numpy:\", np.__version__)\n",
"print(\"pandas:\", pd.__version__)"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false
},
"source": [
"### Globals\n",
"\n",
"File sources:\n",
"* 2013 5-year PUMS data dictionary: [PUMS_Data_Dictionary_2009-2013.txt](http://www2.census.gov/programs-surveys/acs/tech_docs/pums/data_dict/PUMS_Data_Dictionary_2009-2013.txt) (<1 MB)\n",
"* 2013 5-year PUMS person and housing records for Washington DC:\n",
" * Person records: [csv_pdc.zip](http://www2.census.gov/programs-surveys/acs/data/pums/2013/5-Year/csv_pdc.zip) (5 MB compressed, 30 MB decompressed)\n",
" * Housing records: [csv_hdc.zip](http://www2.census.gov/programs-surveys/acs/data/pums/2013/5-Year/csv_hdc.zip) (2 MB compressed, 13 MB decompressed)\n",
"* 2013 5-year PUMS estimates for user verification: [pums_estimates_9_13.csv](http://www2.census.gov/programs-surveys/acs/tech_docs/pums/estimates/pums_estimates_9_13.csv) (<1 MB)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# File paths\n",
"path_static = os.path.join(os.path.expanduser(r'~'), r'stharrold.github.io/content/static')\n",
"basename = r'20160110-etl-census-with-python'\n",
"filename = basename\n",
"path_ipynb = os.path.join(path_static, basename, filename+'.ipynb')\n",
"path_disk = os.path.abspath(r'/mnt/disk-20151227t211000z/')\n",
"path_acs = os.path.join(path_disk, r'www2-census-gov/programs-surveys/acs/')\n",
"path_pcsv = os.path.join(path_acs, r'data/pums/2013/5-Year/ss13pdc.csv') # 'pdc' = 'person DC'\n",
"path_hcsv = os.path.join(path_acs, r'data/pums/2013/5-Year/ss13hdc.csv') # 'hdc' = 'housing DC'\n",
"path_ecsv = os.path.join(path_acs, r'tech_docs/pums/estimates/pums_estimates_9_13.csv')\n",
"path_dtxt = os.path.join(path_acs, r'tech_docs/pums/data_dict/PUMS_Data_Dictionary_2009-2013.txt')\n",
"\n",
"# Weights\n",
"pwt = 'PWGTP' # person weight\n",
"pwts = [pwt+str(inum) for inum in range(1, 81)]\n",
"hwt = 'WGTP' # housing weight\n",
"hwts = [hwt+str(inum) for inum in range(1, 81)]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Extract, transform, and load"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Data dictionary"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"`ddict`: Load the data dictionary and display the hierarchical structure.\n"
]
},
{
"data": {
"text/html": [
"
\n",
"
\n",
" \n",
" \n",
" | \n",
" | \n",
" length | \n",
" description | \n",
" var_codes | \n",
" notes | \n",
"
\n",
" \n",
" record_type | \n",
" var_name | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" HOUSING RECORD | \n",
" ACR | \n",
" 1 | \n",
" Lot size | \n",
" {'b': 'N/A (GQ/not a one-family house or mobil... | \n",
" NaN | \n",
"
\n",
" \n",
" ADJHSG | \n",
" 7 | \n",
" Adjustment factor for housing dollar amounts (... | \n",
" {'1086032': '2009 factor', '1068395': '2010 fa... | \n",
" [Note: The values of ADJHSG inflation-adjusts ... | \n",
"
\n",
" \n",
" ADJINC | \n",
" 7 | \n",
" Adjustment factor for income and earnings doll... | \n",
" {'1085467': '2009 factor (0.999480 * 1.0860317... | \n",
" [Note: The values of ADJINC inflation-adjusts ... | \n",
"
\n",
" \n",
" AGS | \n",
" 1 | \n",
" Sales of Agriculture Products (Yearly sales) | \n",
" {'b': 'N/A (GQ/vacant/not a one-family house o... | \n",
" [Note: No adjustment factor is applied to AGS.] | \n",
"
\n",
" \n",
" BATH | \n",
" 1 | \n",
" Bathtub or shower | \n",
" {'b': 'N/A (GQ)', '1': 'Yes', '2': 'No'} | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" length \\\n",
"record_type var_name \n",
"HOUSING RECORD ACR 1 \n",
" ADJHSG 7 \n",
" ADJINC 7 \n",
" AGS 1 \n",
" BATH 1 \n",
"\n",
" description \\\n",
"record_type var_name \n",
"HOUSING RECORD ACR Lot size \n",
" ADJHSG Adjustment factor for housing dollar amounts (... \n",
" ADJINC Adjustment factor for income and earnings doll... \n",
" AGS Sales of Agriculture Products (Yearly sales) \n",
" BATH Bathtub or shower \n",
"\n",
" var_codes \\\n",
"record_type var_name \n",
"HOUSING RECORD ACR {'b': 'N/A (GQ/not a one-family house or mobil... \n",
" ADJHSG {'1086032': '2009 factor', '1068395': '2010 fa... \n",
" ADJINC {'1085467': '2009 factor (0.999480 * 1.0860317... \n",
" AGS {'b': 'N/A (GQ/vacant/not a one-family house o... \n",
" BATH {'b': 'N/A (GQ)', '1': 'Yes', '2': 'No'} \n",
"\n",
" notes \n",
"record_type var_name \n",
"HOUSING RECORD ACR NaN \n",
" ADJHSG [Note: The values of ADJHSG inflation-adjusts ... \n",
" ADJINC [Note: The values of ADJINC inflation-adjusts ... \n",
" AGS [Note: No adjustment factor is applied to AGS.] \n",
" BATH NaN "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print(\"`ddict`: Load the data dictionary and display the hierarchical structure.\")\n",
"# Only `ddict` is used below.\n",
"# The hierarchical data frame is only for display. \n",
"ddict = dsd.census.parse_pumsdatadict(path=path_dtxt)\n",
"tmp = dict()\n",
"for record_type in ddict['record_types']:\n",
" tmp[record_type] = pd.DataFrame.from_dict(ddict['record_types'][record_type], orient='index')\n",
"pd.concat(tmp, names=['record_type', 'var_name']).head()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"`ddict`: First 10 unstructured notes from end of file.\n"
]
},
{
"data": {
"text/plain": [
"['* In cases where the SOC occupation code ends in X(s) or Y(s), two or more SOC',\n",
" 'occupation codes were aggregated to correspond to a specific Census occupation',\n",
" 'code. In these cases, the Census occupation description is used for the SOC',\n",
" 'occupation title.\"',\n",
" '** These codes are pseudo codes developed by the Census Bureau and are not',\n",
" ' official or equivalent NAICS or SOC codes.',\n",
" 'Legend to Identify NAICS Equivalents',\n",
" ' M = Multiple NAICS codes',\n",
" ' P = Part of a NAICS code - NAICS code split between two or more Census',\n",
" ' codes']"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print(\"`ddict`: First 10 unstructured notes from end of file.\")\n",
"ddict['notes'][:10]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### PUMS data"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"`dfp`, `dfh`: Load person and housing records.\n",
"/mnt/disk-20151227t211000z/www2-census-gov/programs-surveys/acs/data/pums/2013/5-Year/ss13pdc.csv:\n",
" size (MB) = 30.5\n",
" num lines = 30560\n",
" num columns = 295\n",
"/mnt/disk-20151227t211000z/www2-census-gov/programs-surveys/acs/data/pums/2013/5-Year/ss13hdc.csv:\n",
" size (MB) = 13.5\n",
" num lines = 17501\n",
" num columns = 205\n",
"\n",
"dfp RAM usage (MB) = 72.1\n",
"dfh RAM usage (MB) = 28.7\n",
"\n",
"Time elapsed (sec) = 2.0\n"
]
}
],
"source": [
"print(\"`dfp`, `dfh`: Load person and housing records.\")\n",
"time_start = time.perf_counter()\n",
"for path in [path_pcsv, path_hcsv]:\n",
" with open(path) as fobj:\n",
" nlines = sum(1 for _ in fobj)\n",
" with open(path) as fobj:\n",
" first_line = fobj.readline()\n",
" ncols = first_line.count(',')+1\n",
" print(\"{path}:\".format(path=path))\n",
" print(\" size (MB) = {size:.1f}\".format(size=os.path.getsize(path)/1e6))\n",
" print(\" num lines = {nlines}\".format(nlines=nlines))\n",
" print(\" num columns = {ncols}\".format(ncols=ncols))\n",
"print()\n",
"\n",
"# For ss13pdc.csv, low_memory=False since otherwise pandas raises DtypeWarning.\n",
"dfp = pd.read_csv(path_pcsv, low_memory=False)\n",
"dfh = pd.read_csv(path_hcsv, low_memory=True)\n",
"for (name, df) in [('dfp', dfp), ('dfh', dfh)]:\n",
" print(\"{name} RAM usage (MB) = {mem:.1f}\".format(\n",
" name=name, mem=df.memory_usage().sum()/1e6))\n",
"time_stop = time.perf_counter()\n",
"print()\n",
"print(\"Time elapsed (sec) = {diff:.1f}\".format(diff=time_stop-time_start))"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"`dfp`: First 5 person records.\n"
]
},
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" RT | \n",
" SERIALNO | \n",
" SPORDER | \n",
" PUMA00 | \n",
" PUMA10 | \n",
" ST | \n",
" ADJINC | \n",
" PWGTP | \n",
" AGEP | \n",
" CIT | \n",
" ... | \n",
" PWGTP71 | \n",
" PWGTP72 | \n",
" PWGTP73 | \n",
" PWGTP74 | \n",
" PWGTP75 | \n",
" PWGTP76 | \n",
" PWGTP77 | \n",
" PWGTP78 | \n",
" PWGTP79 | \n",
" PWGTP80 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" P | \n",
" 2009000000403 | \n",
" 1 | \n",
" 102 | \n",
" -9 | \n",
" 11 | \n",
" 1085467 | \n",
" 20 | \n",
" 38 | \n",
" 1 | \n",
" ... | \n",
" 6 | \n",
" 26 | \n",
" 31 | \n",
" 32 | \n",
" 26 | \n",
" 6 | \n",
" 36 | \n",
" 6 | \n",
" 19 | \n",
" 20 | \n",
"
\n",
" \n",
" 1 | \n",
" P | \n",
" 2009000001113 | \n",
" 1 | \n",
" 103 | \n",
" -9 | \n",
" 11 | \n",
" 1085467 | \n",
" 13 | \n",
" 78 | \n",
" 1 | \n",
" ... | \n",
" 13 | \n",
" 30 | \n",
" 12 | \n",
" 13 | \n",
" 4 | \n",
" 4 | \n",
" 18 | \n",
" 24 | \n",
" 4 | \n",
" 21 | \n",
"
\n",
" \n",
" 2 | \n",
" P | \n",
" 2009000001113 | \n",
" 2 | \n",
" 103 | \n",
" -9 | \n",
" 11 | \n",
" 1085467 | \n",
" 25 | \n",
" 39 | \n",
" 1 | \n",
" ... | \n",
" 26 | \n",
" 50 | \n",
" 23 | \n",
" 20 | \n",
" 8 | \n",
" 7 | \n",
" 38 | \n",
" 41 | \n",
" 7 | \n",
" 37 | \n",
"
\n",
" \n",
" 3 | \n",
" P | \n",
" 2009000001113 | \n",
" 3 | \n",
" 103 | \n",
" -9 | \n",
" 11 | \n",
" 1085467 | \n",
" 17 | \n",
" 8 | \n",
" 1 | \n",
" ... | \n",
" 15 | \n",
" 32 | \n",
" 17 | \n",
" 15 | \n",
" 6 | \n",
" 4 | \n",
" 26 | \n",
" 32 | \n",
" 5 | \n",
" 30 | \n",
"
\n",
" \n",
" 4 | \n",
" P | \n",
" 2009000001978 | \n",
" 1 | \n",
" 103 | \n",
" -9 | \n",
" 11 | \n",
" 1085467 | \n",
" 37 | \n",
" 53 | \n",
" 1 | \n",
" ... | \n",
" 65 | \n",
" 12 | \n",
" 13 | \n",
" 37 | \n",
" 36 | \n",
" 41 | \n",
" 57 | \n",
" 36 | \n",
" 11 | \n",
" 33 | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 295 columns
\n",
"
"
],
"text/plain": [
" RT SERIALNO SPORDER PUMA00 PUMA10 ST ADJINC PWGTP AGEP CIT \\\n",
"0 P 2009000000403 1 102 -9 11 1085467 20 38 1 \n",
"1 P 2009000001113 1 103 -9 11 1085467 13 78 1 \n",
"2 P 2009000001113 2 103 -9 11 1085467 25 39 1 \n",
"3 P 2009000001113 3 103 -9 11 1085467 17 8 1 \n",
"4 P 2009000001978 1 103 -9 11 1085467 37 53 1 \n",
"\n",
" ... PWGTP71 PWGTP72 PWGTP73 PWGTP74 PWGTP75 PWGTP76 PWGTP77 \\\n",
"0 ... 6 26 31 32 26 6 36 \n",
"1 ... 13 30 12 13 4 4 18 \n",
"2 ... 26 50 23 20 8 7 38 \n",
"3 ... 15 32 17 15 6 4 26 \n",
"4 ... 65 12 13 37 36 41 57 \n",
"\n",
" PWGTP78 PWGTP79 PWGTP80 \n",
"0 6 19 20 \n",
"1 24 4 21 \n",
"2 41 7 37 \n",
"3 32 5 30 \n",
"4 36 11 33 \n",
"\n",
"[5 rows x 295 columns]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print(\"`dfp`: First 5 person records.\")\n",
"dfp.head()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"`dfp`: First 5 housing records.\n"
]
},
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" insp | \n",
" RT | \n",
" SERIALNO | \n",
" DIVISION | \n",
" PUMA00 | \n",
" PUMA10 | \n",
" REGION | \n",
" ST | \n",
" ADJHSG | \n",
" ADJINC | \n",
" ... | \n",
" WGTP71 | \n",
" WGTP72 | \n",
" WGTP73 | \n",
" WGTP74 | \n",
" WGTP75 | \n",
" WGTP76 | \n",
" WGTP77 | \n",
" WGTP78 | \n",
" WGTP79 | \n",
" WGTP80 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 600 | \n",
" H | \n",
" 2009000000403 | \n",
" 5 | \n",
" 102 | \n",
" -9 | \n",
" 3 | \n",
" 11 | \n",
" 1086032 | \n",
" 1085467 | \n",
" ... | \n",
" 6 | \n",
" 25 | \n",
" 30 | \n",
" 32 | \n",
" 26 | \n",
" 6 | \n",
" 36 | \n",
" 6 | \n",
" 18 | \n",
" 19 | \n",
"
\n",
" \n",
" 1 | \n",
" NaN | \n",
" H | \n",
" 2009000001113 | \n",
" 5 | \n",
" 103 | \n",
" -9 | \n",
" 3 | \n",
" 11 | \n",
" 1086032 | \n",
" 1085467 | \n",
" ... | \n",
" 14 | \n",
" 29 | \n",
" 12 | \n",
" 12 | \n",
" 4 | \n",
" 4 | \n",
" 18 | \n",
" 23 | \n",
" 4 | \n",
" 22 | \n",
"
\n",
" \n",
" 2 | \n",
" 480 | \n",
" H | \n",
" 2009000001978 | \n",
" 5 | \n",
" 103 | \n",
" -9 | \n",
" 3 | \n",
" 11 | \n",
" 1086032 | \n",
" 1085467 | \n",
" ... | \n",
" 65 | \n",
" 12 | \n",
" 14 | \n",
" 37 | \n",
" 36 | \n",
" 41 | \n",
" 57 | \n",
" 36 | \n",
" 11 | \n",
" 34 | \n",
"
\n",
" \n",
" 3 | \n",
" NaN | \n",
" H | \n",
" 2009000002250 | \n",
" 5 | \n",
" 105 | \n",
" -9 | \n",
" 3 | \n",
" 11 | \n",
" 1086032 | \n",
" 1085467 | \n",
" ... | \n",
" 4 | \n",
" 4 | \n",
" 4 | \n",
" 4 | \n",
" 23 | \n",
" 14 | \n",
" 11 | \n",
" 4 | \n",
" 20 | \n",
" 21 | \n",
"
\n",
" \n",
" 4 | \n",
" 2500 | \n",
" H | \n",
" 2009000002985 | \n",
" 5 | \n",
" 101 | \n",
" -9 | \n",
" 3 | \n",
" 11 | \n",
" 1086032 | \n",
" 1085467 | \n",
" ... | \n",
" 66 | \n",
" 45 | \n",
" 10 | \n",
" 35 | \n",
" 34 | \n",
" 10 | \n",
" 34 | \n",
" 55 | \n",
" 50 | \n",
" 10 | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 205 columns
\n",
"
"
],
"text/plain": [
" insp RT SERIALNO DIVISION PUMA00 PUMA10 REGION ST ADJHSG \\\n",
"0 600 H 2009000000403 5 102 -9 3 11 1086032 \n",
"1 NaN H 2009000001113 5 103 -9 3 11 1086032 \n",
"2 480 H 2009000001978 5 103 -9 3 11 1086032 \n",
"3 NaN H 2009000002250 5 105 -9 3 11 1086032 \n",
"4 2500 H 2009000002985 5 101 -9 3 11 1086032 \n",
"\n",
" ADJINC ... WGTP71 WGTP72 WGTP73 WGTP74 WGTP75 WGTP76 WGTP77 \\\n",
"0 1085467 ... 6 25 30 32 26 6 36 \n",
"1 1085467 ... 14 29 12 12 4 4 18 \n",
"2 1085467 ... 65 12 14 37 36 41 57 \n",
"3 1085467 ... 4 4 4 4 23 14 11 \n",
"4 1085467 ... 66 45 10 35 34 10 34 \n",
"\n",
" WGTP78 WGTP79 WGTP80 \n",
"0 6 18 19 \n",
"1 23 4 22 \n",
"2 36 11 34 \n",
"3 4 20 21 \n",
"4 55 50 10 \n",
"\n",
"[5 rows x 205 columns]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print(\"`dfp`: First 5 housing records.\")\n",
"dfh.head()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"`dfp`, `dfh`, `ddict`: Describe all columns ('variables') that aren't weights or flags.\n",
"Printed format:\n",
"[PERSON, HOUSING] RECORD\n",
"COL: Column name.\n",
" Column description.\n",
" Multi-line optional column notes.\n",
" 1-3 line description of value meanings ('variable codes').\n",
" Multi-line statistical description and data type.\n",
"...\n",
"num columns described = ncols\n",
"\n",
"PERSON RECORD\n",
"RT: Record Type\n",
" P: Person Record\n",
" count 30559\n",
" unique 1\n",
" top P\n",
" freq 30559\n",
" Name: RT, dtype: object\n",
"SERIALNO: Housing unit/GQ person serial number\n",
" 200900000001..201399999999: Unique identifier\n",
" count 3.055900e+04\n",
" mean 2.011081e+12\n",
" std 1.407751e+09\n",
" min 2.009000e+12\n",
" 25% 2.010000e+12\n",
" 50% 2.011001e+12\n",
" 75% 2.012001e+12\n",
" max 2.013001e+12\n",
" Name: SERIALNO, dtype: float64\n",
"SPORDER: Person number\n",
" 01..20: Person number\n",
" count 30559.000000\n",
" mean 1.850584\n",
" std 1.235291\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 1.000000\n",
" 75% 2.000000\n",
" max 13.000000\n",
" Name: SPORDER, dtype: float64\n",
"PUMA00: Public use microdata area code (PUMA) based on Census 2000 definition for data collected prior to 2012. Use in combination with PUMA10.\n",
" ['Note: Public Use Microdata Areas (PUMAs) designate areas of 100,000 or more population. Use with ST for unique code. PUMA00 applies to data collected in calendar years 2011 and earlier. Users of multi-year datasets that contain data from before and after 2012 should use PUMA00 and PUMA10 together where possible, but not all PUMAs will be comparable. See http://tigerweb.geo.census.gov to identify PUMA changes between the two vintages.']\n",
" 00100..08200: Public use microdata area codes\n",
" 7777: combination of 01801, 01802, and 01905 in Louisiana\n",
" -0009: Code classification is Not Applicable for data collected in 2012 or later\n",
" ...\n",
" count 30559.000000\n",
" mean 55.840243\n",
" std 55.336541\n",
" min -9.000000\n",
" 25% -9.000000\n",
" 50% 101.000000\n",
" 75% 103.000000\n",
" max 105.000000\n",
" Name: PUMA00, dtype: float64\n",
"PUMA10: Public use microdata area code (PUMA) based on 2010 Census definition for data Collected in 2012 or later. Use in combination with PUMA00.\n",
" ['Note: Public Use Microdata Areas (PUMAs) designate areas of 100,000 or more population. Use with ST for unique code. PUMA10 applies to data collected in calendar year 2012 and later. Users of multi-year datasets that contain data from before and after 2012 should use PUMA00 and PUMA10 together where possible, but not all PUMAs will be comparable. See http://tigerweb.geo.census.gov to identify PUMA changes between the two vintages.']\n",
" 00100..70301: Public use microdata area codes\n",
" -0009: Code classification is Not Applicable for data collected prior to 2012\n",
" count 30559.000000\n",
" mean 38.259923\n",
" std 55.395391\n",
" min -9.000000\n",
" 25% -9.000000\n",
" 50% -9.000000\n",
" 75% 103.000000\n",
" max 105.000000\n",
" Name: PUMA10, dtype: float64\n",
"ST: State Code\n",
" 01: Alabama/AL\n",
" 02: Alaska/AK\n",
" 04: Arizona/AZ\n",
" ...\n",
" count 30559\n",
" mean 11\n",
" std 0\n",
" min 11\n",
" 25% 11\n",
" 50% 11\n",
" 75% 11\n",
" max 11\n",
" Name: ST, dtype: float64\n",
"ADJINC: Adjustment factor for income and earnings dollar amounts (6 implied decimal places)\n",
" ['Note: The values of ADJINC inflation-adjusts reported income to 2013 dollars. ADJINC incorporates an adjustment that annualizes the different rolling reference periods for reported income (as done in the single-year data using the variable ADJINC from the 1-year file) and an adjustment to inflation-adjust the annualized income to 2013 dollars. ADJINC applies to variables FINCP and HINCP in the housing record, and variables INTP, OIP, PAP, PERNP, PINCP, RETP, SEMP, SSIP, SSP, and WAGP in the person record.']\n",
" 1085467: 2009 factor (0.999480 * 1.08603175)\n",
" 1076540: 2010 factor (1.007624 * 1.06839475)\n",
" 1054614: 2011 factor (1.018237 * 1.03572510)\n",
" ...\n",
" count 30559.000000\n",
" mean 1048186.138192\n",
" std 29716.696630\n",
" min 1007549.000000\n",
" 25% 1024887.000000\n",
" 50% 1054614.000000\n",
" 75% 1076540.000000\n",
" max 1085467.000000\n",
" Name: ADJINC, dtype: float64\n",
"PWGTP: Person's weight\n",
" 00001..09999: Integer weight of person\n",
" count 30559.000000\n",
" mean 20.268039\n",
" std 13.310075\n",
" min 1.000000\n",
" 25% 12.000000\n",
" 50% 16.000000\n",
" 75% 24.000000\n",
" max 173.000000\n",
" Name: PWGTP, dtype: float64\n",
"AGEP: Age\n",
" 00: Under 1 year\n",
" 01..99: 1 to 99 years (Top-coded***)\n",
" count 30559.000000\n",
" mean 38.728198\n",
" std 21.780122\n",
" min 0.000000\n",
" 25% 23.000000\n",
" 50% 35.000000\n",
" 75% 55.000000\n",
" max 95.000000\n",
" Name: AGEP, dtype: float64\n",
"CIT: Citizenship status\n",
" 1: Born in the U.S.\n",
" 2: Born in Puerto Rico, Guam, the U.S. Virgin Islands, or the Northern Marianas\n",
" 3: Born abroad of American parent(s)\n",
" ...\n",
" count 30559.000000\n",
" mean 1.471252\n",
" std 1.201267\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 1.000000\n",
" 75% 1.000000\n",
" max 5.000000\n",
" Name: CIT, dtype: float64\n",
"CITWP05: Year of naturalization write-in for data collected prior to 2012\n",
" bbbb: Not eligible - not naturalized\n",
" 1925: 1925 or earlier (Bottom-coded)\n",
" 1926: 1926 - 1930\n",
" ...\n",
" count 1595.000000\n",
" mean 1110.813166\n",
" std 994.495059\n",
" min -9.000000\n",
" 25% -9.000000\n",
" 50% 1973.000000\n",
" 75% 1999.000000\n",
" max 2011.000000\n",
" Name: CITWP05, dtype: float64\n",
"CITWP12: Year of naturalization write-in for data collected in 2012 or later\n",
" bbbb: Not eligible - not naturalized\n",
" 1928: 1928 or earlier (Bottom-coded)\n",
" 1929: 1929 - 1933\n",
" ...\n",
" count 1595.000000\n",
" mean 875.462696\n",
" std 996.639591\n",
" min -9.000000\n",
" 25% -9.000000\n",
" 50% -9.000000\n",
" 75% 1999.000000\n",
" max 2013.000000\n",
" Name: CITWP12, dtype: float64\n",
"COW: Class of worker\n",
" b: Not in universe (less than 16 years old/NILF who last worked more than 5 years ago or never worked)\n",
" 1: Employee of a private for-profit company or business, or of an individual, for wages, salary, or commissions\n",
" 2: Employee of a private not-for-profit, tax-exempt, or charitable organization\n",
" ...\n",
" count 20557.000000\n",
" mean 2.592937\n",
" std 1.971727\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 2.000000\n",
" 75% 5.000000\n",
" max 9.000000\n",
" Name: COW, dtype: float64\n",
"DDRS: Self-care difficulty\n",
" b: N/A (Less than 5 years old)\n",
" 1: Yes\n",
" 2: No\n",
" ...\n",
" count 29078.000000\n",
" mean 1.969668\n",
" std 0.171503\n",
" min 1.000000\n",
" 25% 2.000000\n",
" 50% 2.000000\n",
" 75% 2.000000\n",
" max 2.000000\n",
" Name: DDRS, dtype: float64\n",
"DEAR: Hearing difficulty\n",
" 1: Yes\n",
" 2: No\n",
" count 30559.000000\n",
" mean 1.978010\n",
" std 0.146654\n",
" min 1.000000\n",
" 25% 2.000000\n",
" 50% 2.000000\n",
" 75% 2.000000\n",
" max 2.000000\n",
" Name: DEAR, dtype: float64\n",
"DEYE: Vision difficulty\n",
" 1: Yes\n",
" 2: No\n",
" count 30559.000000\n",
" mean 1.975163\n",
" std 0.155631\n",
" min 1.000000\n",
" 25% 2.000000\n",
" 50% 2.000000\n",
" 75% 2.000000\n",
" max 2.000000\n",
" Name: DEYE, dtype: float64\n",
"DOUT: Independent living difficulty\n",
" b: N/A (Less than 15 years old)\n",
" 1: Yes\n",
" 2: No\n",
" ...\n",
" count 26658.000000\n",
" mean 1.943432\n",
" std 0.231020\n",
" min 1.000000\n",
" 25% 2.000000\n",
" 50% 2.000000\n",
" 75% 2.000000\n",
" max 2.000000\n",
" Name: DOUT, dtype: float64\n",
"DPHY: Ambulatory difficulty\n",
" b: N/A (Less than 5 years old)\n",
" 1: Yes\n",
" 2: No\n",
" ...\n",
" count 29078.000000\n",
" mean 1.922966\n",
" std 0.266650\n",
" min 1.000000\n",
" 25% 2.000000\n",
" 50% 2.000000\n",
" 75% 2.000000\n",
" max 2.000000\n",
" Name: DPHY, dtype: float64\n",
"DRAT: Veteran service connected disability rating (percentage)\n",
" b: N/A (No service-connected disability/never served in military)\n",
" 1: 0 percent\n",
" 2: 10 or 20 percent\n",
" ...\n",
" count 287.000000\n",
" mean 3.400697\n",
" std 1.587671\n",
" min 1.000000\n",
" 25% 2.000000\n",
" 50% 3.000000\n",
" 75% 5.000000\n",
" max 6.000000\n",
" Name: DRAT, dtype: float64\n",
"DRATX: Veteran service connected disability rating (checkbox)\n",
" b: N/A (Less than 17 years old/never served in military)\n",
" 1: Yes\n",
" 2: No\n",
" ...\n",
" count 2072.000000\n",
" mean 1.861486\n",
" std 0.345522\n",
" min 1.000000\n",
" 25% 2.000000\n",
" 50% 2.000000\n",
" 75% 2.000000\n",
" max 2.000000\n",
" Name: DRATX, dtype: float64\n",
"DREM: Cognitive difficulty\n",
" b: N/A (Less than 5 years old)\n",
" 1: Yes\n",
" 2: No\n",
" ...\n",
" count 29078.000000\n",
" mean 1.943015\n",
" std 0.231817\n",
" min 1.000000\n",
" 25% 2.000000\n",
" 50% 2.000000\n",
" 75% 2.000000\n",
" max 2.000000\n",
" Name: DREM, dtype: float64\n",
"ENG: Ability to speak English\n",
" b: N/A (less than 5 years old/speaks only English)\n",
" 1: Very well\n",
" 2: Well\n",
" ...\n",
" count 4231.000000\n",
" mean 1.429449\n",
" std 0.750207\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 1.000000\n",
" 75% 2.000000\n",
" max 4.000000\n",
" Name: ENG, dtype: float64\n",
"FER: Gave birth to child within the past 12 months\n",
" ['NOTE: Problems in the collection of data on women who gave birth in the past year (FER) in 2012 led to suppressing this variable in 59 PUMAs within states Florida, Georgia, Kansas, Montana, North Carolina, Ohio and Texas. This only affects 2012 vintage data. See the Estimation section of the Accuracy of the Data for the 2009-2013 5-year PUMS for more information on PUMS estimates using FER. http://www.census.gov/acs/www/data_documentation/pums_documentation/']\n",
" b: N/A (less than 15 years/greater than 50 years/ male)\n",
" 1: Yes\n",
" 2: No\n",
" ...\n",
" count 9036.000000\n",
" mean 1.955069\n",
" std 0.207165\n",
" min 1.000000\n",
" 25% 2.000000\n",
" 50% 2.000000\n",
" 75% 2.000000\n",
" max 2.000000\n",
" Name: FER, dtype: float64\n",
"GCL: Grandparents living with grandchildren\n",
" b: N/A (less than 30 years/institutional GQ)\n",
" 1: Yes\n",
" 2: No\n",
" ...\n",
" count 18439.000000\n",
" mean 1.968599\n",
" std 0.174403\n",
" min 1.000000\n",
" 25% 2.000000\n",
" 50% 2.000000\n",
" 75% 2.000000\n",
" max 2.000000\n",
" Name: GCL, dtype: float64\n",
"GCM: Length of time responsible for grandchildren\n",
" b: N/A (less than 30 years/grandparent not responsible for grandchild/institutional GQ)\n",
" 1: Less than 6 months\n",
" 2: 6 to 11 months\n",
" ...\n",
" count 228.000000\n",
" mean 4.026316\n",
" std 1.078100\n",
" min 1.000000\n",
" 25% 3.000000\n",
" 50% 4.000000\n",
" 75% 5.000000\n",
" max 5.000000\n",
" Name: GCM, dtype: float64\n",
"GCR: Grandparents responsible for grandchildren\n",
" b: N/A (less than 30 years/institutional GQ/grandparent not living with grandchild)\n",
" 1: Yes\n",
" 2: No\n",
" ...\n",
" count 579.000000\n",
" mean 1.606218\n",
" std 0.489010\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 2.000000\n",
" 75% 2.000000\n",
" max 2.000000\n",
" Name: GCR, dtype: float64\n",
"HINS1: Insurance through a current or former employer or union\n",
" 1: Yes\n",
" 2: No\n",
" count 30559.000000\n",
" mean 1.387120\n",
" std 0.487099\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 1.000000\n",
" 75% 2.000000\n",
" max 2.000000\n",
" Name: HINS1, dtype: float64\n",
"HINS2: Insurance purchased directly from an insurance company\n",
" 1: Yes\n",
" 2: No\n",
" count 30559.000000\n",
" mean 1.852548\n",
" std 0.354562\n",
" min 1.000000\n",
" 25% 2.000000\n",
" 50% 2.000000\n",
" 75% 2.000000\n",
" max 2.000000\n",
" Name: HINS2, dtype: float64\n",
"HINS3: Medicare, for people 65 and older, or people with certain disabilities\n",
" 1: Yes\n",
" 2: No\n",
" count 30559.000000\n",
" mean 1.847999\n",
" std 0.359028\n",
" min 1.000000\n",
" 25% 2.000000\n",
" 50% 2.000000\n",
" 75% 2.000000\n",
" max 2.000000\n",
" Name: HINS3, dtype: float64\n",
"HINS4: Medicaid, Medical Assistance, or any kind of government-assistance plan for those with low incomes or a disability\n",
" 1: Yes\n",
" 2: No\n",
" count 30559.000000\n",
" mean 1.763016\n",
" std 0.425239\n",
" min 1.000000\n",
" 25% 2.000000\n",
" 50% 2.000000\n",
" 75% 2.000000\n",
" max 2.000000\n",
" Name: HINS4, dtype: float64\n",
"HINS5: TRICARE or other military health care\n",
" 1: Yes\n",
" 2: No\n",
" count 30559.000000\n",
" mean 1.973461\n",
" std 0.160734\n",
" min 1.000000\n",
" 25% 2.000000\n",
" 50% 2.000000\n",
" 75% 2.000000\n",
" max 2.000000\n",
" Name: HINS5, dtype: float64\n",
"HINS6: VA (including those who have ever used or enrolled for VA health care)\n",
" 1: Yes\n",
" 2: No\n",
" count 30559.000000\n",
" mean 1.981675\n",
" std 0.134127\n",
" min 1.000000\n",
" 25% 2.000000\n",
" 50% 2.000000\n",
" 75% 2.000000\n",
" max 2.000000\n",
" Name: HINS6, dtype: float64\n",
"HINS7: Indian Health Service\n",
" 1: Yes\n",
" 2: No\n",
" count 30559.000000\n",
" mean 1.999215\n",
" std 0.028014\n",
" min 1.000000\n",
" 25% 2.000000\n",
" 50% 2.000000\n",
" 75% 2.000000\n",
" max 2.000000\n",
" Name: HINS7, dtype: float64\n",
"INTP: Interest, dividends, and net rental income past 12 months (signed)\n",
" ['Note: Use values from ADJINC to adjust INTP to constant dollars.']\n",
" bbbbbb: N/A (less than 15 years old)\n",
" 000000: None\n",
" -09999..-00001: Loss $1 to $9999 (Rounded and bottom-coded)\n",
" ...\n",
" count 26658.000000\n",
" mean 2798.324368\n",
" std 18916.559752\n",
" min -7700.000000\n",
" 25% 0.000000\n",
" 50% 0.000000\n",
" 75% 0.000000\n",
" max 325000.000000\n",
" Name: INTP, dtype: float64\n",
"JWMNP: Travel time to work\n",
" bbb: N/A (not a worker or worker who worked at home)\n",
" 001..200: 1 to 200 minutes to get to work (Top-coded)\n",
" count 14545.000000\n",
" mean 29.764043\n",
" std 19.584350\n",
" min 1.000000\n",
" 25% 15.000000\n",
" 50% 30.000000\n",
" 75% 40.000000\n",
" max 142.000000\n",
" Name: JWMNP, dtype: float64\n",
"JWRIP: Vehicle occupancy\n",
" bb: N/A (not a worker or worker whose means of transportation to work was not car, truck, or van)\n",
" 01: Drove alone\n",
" 02: In 2-person carpool\n",
" ...\n",
" count 6211.000000\n",
" mean 1.224602\n",
" std 0.677173\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 1.000000\n",
" 75% 1.000000\n",
" max 10.000000\n",
" Name: JWRIP, dtype: float64\n",
"JWTR: Means of transportation to work\n",
" bb: N/A (not a worker--not in the labor force, including persons under 16 years; unemployed; employed, with a job but not at work; Armed Forces, with a job but not at work)\n",
" 01: Car, truck, or van\n",
" 02: Bus or trolley bus\n",
" ...\n",
" count 15327.000000\n",
" mean 3.863900\n",
" std 3.554906\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 2.000000\n",
" 75% 4.000000\n",
" max 12.000000\n",
" Name: JWTR, dtype: float64\n",
"LANX: Language other than English spoken at home\n",
" b: N/A (less than 5 years old)\n",
" 1: Yes, speaks another language\n",
" 2: No, speaks only English\n",
" ...\n",
" count 29078.000000\n",
" mean 1.854495\n",
" std 0.352616\n",
" min 1.000000\n",
" 25% 2.000000\n",
" 50% 2.000000\n",
" 75% 2.000000\n",
" max 2.000000\n",
" Name: LANX, dtype: float64\n",
"MAR: Marital status\n",
" 1: Married\n",
" 2: Widowed\n",
" 3: Divorced\n",
" ...\n",
" count 30559.000000\n",
" mean 3.659118\n",
" std 1.737333\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 5.000000\n",
" 75% 5.000000\n",
" max 5.000000\n",
" Name: MAR, dtype: float64\n",
"MARHD: Divorced in the past 12 months\n",
" b: N/A (age less than 15 years; never married)\n",
" 1: Yes\n",
" 2: No\n",
" ...\n",
" count 12371.000000\n",
" mean 1.982297\n",
" std 0.131874\n",
" min 1.000000\n",
" 25% 2.000000\n",
" 50% 2.000000\n",
" 75% 2.000000\n",
" max 2.000000\n",
" Name: MARHD, dtype: float64\n",
"MARHM: Married in the past 12 months\n",
" b: N/A (age less than 15 years; never married)\n",
" 1: Yes\n",
" 2: No\n",
" ...\n",
" count 12371.000000\n",
" mean 1.954086\n",
" std 0.209307\n",
" min 1.000000\n",
" 25% 2.000000\n",
" 50% 2.000000\n",
" 75% 2.000000\n",
" max 2.000000\n",
" Name: MARHM, dtype: float64\n",
"MARHT: Number of times married\n",
" b: N/A (age less than 15 years; never married)\n",
" 1: One time\n",
" 2: Two times\n",
" ...\n",
" count 12371.000000\n",
" mean 1.207259\n",
" std 0.461325\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 1.000000\n",
" 75% 1.000000\n",
" max 3.000000\n",
" Name: MARHT, dtype: float64\n",
"MARHW: Widowed in the past 12 months\n",
" b: N/A (age less than 15 years; never married)\n",
" 1: Yes\n",
" 2: No\n",
" ...\n",
" count 12371.000000\n",
" mean 1.990866\n",
" std 0.095140\n",
" min 1.000000\n",
" 25% 2.000000\n",
" 50% 2.000000\n",
" 75% 2.000000\n",
" max 2.000000\n",
" Name: MARHW, dtype: float64\n",
"MARHYP05: Year last married for data collected prior to 2012\n",
" bbbb: N/A (age less than 15 years; never married)\n",
" 1928: 1928 or earlier (Bottom-coded)\n",
" 1929: 1929\n",
" ...\n",
" count 12371.000000\n",
" mean 1158.447337\n",
" std 982.950909\n",
" min -9.000000\n",
" 25% -9.000000\n",
" 50% 1963.000000\n",
" 75% 1994.000000\n",
" max 2011.000000\n",
" Name: MARHYP05, dtype: float64\n",
"MARHYP12: Year last married for data collected in 2012 or later\n",
" bbbb: N/A (age less than 15 years; never married)\n",
" 1932: 1932 or earlier (Bottom-coded)\n",
" 1933: 1933\n",
" ...\n",
" count 12371.000000\n",
" mean 819.956026\n",
" std 984.800070\n",
" min -9.000000\n",
" 25% -9.000000\n",
" 50% -9.000000\n",
" 75% 1987.000000\n",
" max 2013.000000\n",
" Name: MARHYP12, dtype: float64\n",
"MIG: Mobility status (lived here 1 year ago)\n",
" b: N/A (less than 1 year old)\n",
" 1: Yes, same house (nonmovers)\n",
" 2: No, outside US and Puerto Rico\n",
" ...\n",
" count 30229.00000\n",
" mean 1.37163\n",
" std 0.76873\n",
" min 1.00000\n",
" 25% 1.00000\n",
" 50% 1.00000\n",
" 75% 1.00000\n",
" max 3.00000\n",
" Name: MIG, dtype: float64\n",
"MIL: Military service\n",
" b: N/A (less than 17 years old)\n",
" 1: Now on active duty\n",
" 2: On active duty in the past, but not now\n",
" ...\n",
" count 26114.000000\n",
" mean 3.842843\n",
" std 0.549228\n",
" min 1.000000\n",
" 25% 4.000000\n",
" 50% 4.000000\n",
" 75% 4.000000\n",
" max 4.000000\n",
" Name: MIL, dtype: float64\n",
"MLPA: Served September 2001 or later\n",
" b: N/A (Less than 17 years old/no active duty)\n",
" 0: Did not serve this period\n",
" 1: Served this period\n",
" ...\n",
" count 1855.000000\n",
" mean 0.224798\n",
" std 0.417562\n",
" min 0.000000\n",
" 25% 0.000000\n",
" 50% 0.000000\n",
" 75% 0.000000\n",
" max 1.000000\n",
" Name: MLPA, dtype: float64\n",
"MLPB: Served August 1990 - August 2001 (including Persian Gulf War)\n",
" b: N/A (Less than 17 years old/no active duty)\n",
" 0: Did not serve this period\n",
" 1: Served this period\n",
" ...\n",
" count 1855.000000\n",
" mean 0.161186\n",
" std 0.367802\n",
" min 0.000000\n",
" 25% 0.000000\n",
" 50% 0.000000\n",
" 75% 0.000000\n",
" max 1.000000\n",
" Name: MLPB, dtype: float64\n",
"MLPCD: Served May 1975 - July 1990\n",
" b: N/A (less than 17 years old/no active duty)\n",
" 0: Did not serve this period\n",
" 1: Served this period\n",
" ...\n",
" count 1855.000000\n",
" mean 0.232345\n",
" std 0.422442\n",
" min 0.000000\n",
" 25% 0.000000\n",
" 50% 0.000000\n",
" 75% 0.000000\n",
" max 1.000000\n",
" Name: MLPCD, dtype: float64\n",
"MLPE: Served Vietnam era (August 1964 - April 1975)\n",
" b: N/A (Less than 17 years old/no active duty)\n",
" 0: Did not serve this period\n",
" 1: Served this period\n",
" ...\n",
" count 1855.000000\n",
" mean 0.290027\n",
" std 0.453897\n",
" min 0.000000\n",
" 25% 0.000000\n",
" 50% 0.000000\n",
" 75% 1.000000\n",
" max 1.000000\n",
" Name: MLPE, dtype: float64\n",
"MLPFG: Served February 1955 - July 1964\n",
" b: N/A (less than 17 years old/no active duty)\n",
" 0: Did not serve this period\n",
" 1: Served this period\n",
" ...\n",
" count 1855.000000\n",
" mean 0.145013\n",
" std 0.352209\n",
" min 0.000000\n",
" 25% 0.000000\n",
" 50% 0.000000\n",
" 75% 0.000000\n",
" max 1.000000\n",
" Name: MLPFG, dtype: float64\n",
"MLPH: Served Korean War (July 1950 - January 1955)\n",
" b: N/A (Less than 17 years old/no active duty)\n",
" 0: Did not serve this period\n",
" 1: Served this period\n",
" ...\n",
" count 1855.000000\n",
" mean 0.117520\n",
" std 0.322126\n",
" min 0.000000\n",
" 25% 0.000000\n",
" 50% 0.000000\n",
" 75% 0.000000\n",
" max 1.000000\n",
" Name: MLPH, dtype: float64\n",
"MLPI: Served January 1947 - June 1950\n",
" b: N/A (Less than 17 years old/no active duty)\n",
" 0: Did not serve this period\n",
" 1: Served this period\n",
" ...\n",
" count 1855.000000\n",
" mean 0.017790\n",
" std 0.132222\n",
" min 0.000000\n",
" 25% 0.000000\n",
" 50% 0.000000\n",
" 75% 0.000000\n",
" max 1.000000\n",
" Name: MLPI, dtype: float64\n",
"MLPJ: Served World War II (December 1941 - December 1946)\n",
" b: N/A (Less than 17 years old/no active duty)\n",
" 0: Did not serve this period\n",
" 1: Served this period\n",
" ...\n",
" count 1855.000000\n",
" mean 0.086253\n",
" std 0.280814\n",
" min 0.000000\n",
" 25% 0.000000\n",
" 50% 0.000000\n",
" 75% 0.000000\n",
" max 1.000000\n",
" Name: MLPJ, dtype: float64\n",
"MLPK: Served November 1941 or earlier\n",
" b: N/A (Less than 17 years old/no active duty)\n",
" 0: Did not serve this period\n",
" 1: Served this period\n",
" ...\n",
" count 1855.000000\n",
" mean 0.003774\n",
" std 0.061330\n",
" min 0.000000\n",
" 25% 0.000000\n",
" 50% 0.000000\n",
" 75% 0.000000\n",
" max 1.000000\n",
" Name: MLPK, dtype: float64\n",
"NWAB: Temporary absence from work (UNEDITED-See \"Employment Status Recode\" (ESR))\n",
" b: N/A (less than 16 years old/at work/on layoff)\n",
" 1: Yes\n",
" 2: No\n",
" ...\n",
" count 26399.000000\n",
" mean 2.564074\n",
" std 0.529434\n",
" min 1.000000\n",
" 25% 2.000000\n",
" 50% 3.000000\n",
" 75% 3.000000\n",
" max 3.000000\n",
" Name: NWAB, dtype: float64\n",
"NWAV: Available for work (UNEDITED-See \"Employment Status Recode\" (ESR))\n",
" b: N/A (less than 16 years/at work/not looking)\n",
" 1: Yes\n",
" 2: No, temporarily ill\n",
" ...\n",
" count 26399.000000\n",
" mean 4.530702\n",
" std 1.224743\n",
" min 1.000000\n",
" 25% 5.000000\n",
" 50% 5.000000\n",
" 75% 5.000000\n",
" max 5.000000\n",
" Name: NWAV, dtype: float64\n",
"NWLA: On layoff from work (UNEDITED-See \"Employment Status Recode\" (ESR))\n",
" b: N/A (less than 16 years old/at work)\n",
" 1: Yes\n",
" 2: No\n",
" ...\n",
" count 26399.000000\n",
" mean 2.540134\n",
" std 0.524830\n",
" min 1.000000\n",
" 25% 2.000000\n",
" 50% 3.000000\n",
" 75% 3.000000\n",
" max 3.000000\n",
" Name: NWLA, dtype: float64\n",
"NWLK: Looking for work (UNEDITED-See \"Employment Status Recode\" (ESR))\n",
" b: N/A (less than 16 years old/at work/temporarily absent/informed of recall)\n",
" 1: Yes\n",
" 2: No\n",
" ...\n",
" count 26399.000000\n",
" mean 2.503315\n",
" std 0.635689\n",
" min 1.000000\n",
" 25% 2.000000\n",
" 50% 3.000000\n",
" 75% 3.000000\n",
" max 3.000000\n",
" Name: NWLK, dtype: float64\n",
"NWRE: Informed of recall (UNEDITED-See \"Employment Status Recode\" (ESR))\n",
" b: N/A (less than 16 years old/at work/not on layoff)\n",
" 1: Yes\n",
" 2: No\n",
" ...\n",
" count 26399.000000\n",
" mean 2.903974\n",
" std 0.315014\n",
" min 1.000000\n",
" 25% 3.000000\n",
" 50% 3.000000\n",
" 75% 3.000000\n",
" max 3.000000\n",
" Name: NWRE, dtype: float64\n",
"OIP: All other income past 12 months\n",
" ['Note: Use values from ADJINC to adjust OIP to constant dollars.']\n",
" bbbbbb: N/A (less than 15 years old)\n",
" 000000: None\n",
" 000001..999999: $1 to $999999 (Rounded and top-coded)\n",
" ...\n",
" count 26658.000000\n",
" mean 675.345037\n",
" std 4722.241622\n",
" min 0.000000\n",
" 25% 0.000000\n",
" 50% 0.000000\n",
" 75% 0.000000\n",
" max 83000.000000\n",
" Name: OIP, dtype: float64\n",
"PAP: Public assistance income past 12 months\n",
" ['Note: Use values from ADJINC to adjust PAP to constant dollars.']\n",
" bbbbb: N/A (less than 15 years old)\n",
" 00000: None\n",
" 00001..99999: $1 to $99999 (Rounded)\n",
" ...\n",
" count 26658.000000\n",
" mean 76.790832\n",
" std 692.300350\n",
" min 0.000000\n",
" 25% 0.000000\n",
" 50% 0.000000\n",
" 75% 0.000000\n",
" max 25400.000000\n",
" Name: PAP, dtype: float64\n",
"RELP: Relationship\n",
" 00: Reference person\n",
" 01: Husband/wife\n",
" 02: Biological son or daughter\n",
" ...\n",
" count 30559.000000\n",
" mean 3.656795\n",
" std 5.569053\n",
" min 0.000000\n",
" 25% 0.000000\n",
" 50% 1.000000\n",
" 75% 5.000000\n",
" max 17.000000\n",
" Name: RELP, dtype: float64\n",
"RETP: Retirement income past 12 months\n",
" ['Note: Use values from ADJINC to adjust RETP to constant dollars.']\n",
" bbbbbb: N/A (less than 15 years old)\n",
" 000000: None\n",
" 000001..999999: $1 to $999999 (Rounded and top-coded)\n",
" ...\n",
" count 26658.000000\n",
" mean 3493.095881\n",
" std 15552.960973\n",
" min 0.000000\n",
" 25% 0.000000\n",
" 50% 0.000000\n",
" 75% 0.000000\n",
" max 249000.000000\n",
" Name: RETP, dtype: float64\n",
"SCH: School enrollment\n",
" b: N/A (less than 3 years old)\n",
" 1: No, has not attended in the last 3 months\n",
" 2: Yes, public school or public college\n",
" ...\n",
" count 29645.000000\n",
" mean 1.376050\n",
" std 0.691228\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 1.000000\n",
" 75% 2.000000\n",
" max 3.000000\n",
" Name: SCH, dtype: float64\n",
"SCHG: Grade level attending\n",
" bb: N/A (not attending school)\n",
" 01: Nursery school/preschool\n",
" 02: Kindergarten\n",
" ...\n",
" count 7544.00000\n",
" mean 11.28526\n",
" std 5.07951\n",
" min 1.00000\n",
" 25% 7.00000\n",
" 50% 14.00000\n",
" 75% 15.00000\n",
" max 16.00000\n",
" Name: SCHG, dtype: float64\n",
"SCHL: Educational attainment\n",
" bb: N/A (less than 3 years old)\n",
" 01: No schooling completed\n",
" 02: Nursery school, preschool\n",
" ...\n",
" count 29645.000000\n",
" mean 17.468173\n",
" std 5.585758\n",
" min 1.000000\n",
" 25% 16.000000\n",
" 50% 19.000000\n",
" 75% 21.000000\n",
" max 24.000000\n",
" Name: SCHL, dtype: float64\n",
"SEMP: Self-employment income past 12 months (signed)\n",
" ['Note: Use values from ADJINC to adjust SEMP to constant dollars.']\n",
" bbbbbb: N/A (less than 15 years old)\n",
" 000000: None\n",
" -10000..-00001: Loss $1 to $10000 (Rounded and bottom-coded)\n",
" ...\n",
" count 26658.000000\n",
" mean 2956.785243\n",
" std 30447.719592\n",
" min -9100.000000\n",
" 25% 0.000000\n",
" 50% 0.000000\n",
" 75% 0.000000\n",
" max 727000.000000\n",
" Name: SEMP, dtype: float64\n",
"SEX: Sex\n",
" 1: Male\n",
" 2: Female\n",
" count 30559.000000\n",
" mean 1.537878\n",
" std 0.498571\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 2.000000\n",
" 75% 2.000000\n",
" max 2.000000\n",
" Name: SEX, dtype: float64\n",
"SSIP: Supplementary Security Income past 12 months\n",
" ['Note: Use values from ADJINC to adjust SSIP to constant dollars.']\n",
" bbbbb: N/A (less than 15 years old)\n",
" 00000: None\n",
" 00001..99999: $1 to $99999 (Rounded)\n",
" ...\n",
" count 26658.000000\n",
" mean 296.556381\n",
" std 1651.630937\n",
" min 0.000000\n",
" 25% 0.000000\n",
" 50% 0.000000\n",
" 75% 0.000000\n",
" max 30000.000000\n",
" Name: SSIP, dtype: float64\n",
"SSP: Social Security income past 12 months\n",
" ['Note: Use values from ADJINC to adjust SSP to constant dollars.']\n",
" bbbbb: N/A (less than 15 years old)\n",
" 00000: None\n",
" 00001..99999: $1 to $99999 (Rounded)\n",
" ...\n",
" count 26658.000000\n",
" mean 1618.331458\n",
" std 4844.120790\n",
" min 0.000000\n",
" 25% 0.000000\n",
" 50% 0.000000\n",
" 75% 0.000000\n",
" max 50000.000000\n",
" Name: SSP, dtype: float64\n",
"WAGP: Wages or salary income past 12 months\n",
" ['Note: Use values from ADJINC to adjust WAGP to constant dollars.']\n",
" bbbbbb: N/A (less than 15 years old)\n",
" 000000: None\n",
" 000001..999999: $1 to 999999 (Rounded and top-coded)\n",
" ...\n",
" count 26658.000000\n",
" mean 41347.736139\n",
" std 69993.911285\n",
" min 0.000000\n",
" 25% 0.000000\n",
" 50% 15000.000000\n",
" 75% 60000.000000\n",
" max 660000.000000\n",
" Name: WAGP, dtype: float64\n",
"WKHP: Usual hours worked per week past 12 months\n",
" bb: N/A (less than 16 years old/did not work during the past 12 months)\n",
" 01..98: 1 to 98 usual hours\n",
" 99: 99 or more usual hours\n",
" ...\n",
" count 17950.000000\n",
" mean 39.674485\n",
" std 13.046600\n",
" min 1.000000\n",
" 25% 38.000000\n",
" 50% 40.000000\n",
" 75% 45.000000\n",
" max 99.000000\n",
" Name: WKHP, dtype: float64\n",
"WKL: When last worked\n",
" b: N/A (less than 16 years old)\n",
" 1: Within the past 12 months\n",
" 2: 1-5 years ago\n",
" ...\n",
" count 26399.000000\n",
" mean 1.550930\n",
" std 0.842136\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 1.000000\n",
" 75% 2.000000\n",
" max 3.000000\n",
" Name: WKL, dtype: float64\n",
"WKW: Weeks worked during past 12 months\n",
" b: N/A (less than 16 years old/did not work during the past 12 months)\n",
" 1: 50 to 52 weeks\n",
" 2: 48 to 49 weeks\n",
" ...\n",
" count 17950.000000\n",
" mean 1.952201\n",
" std 1.698859\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 1.000000\n",
" 75% 3.000000\n",
" max 6.000000\n",
" Name: WKW, dtype: float64\n",
"WRK: Worked last week\n",
" b: N/A (not reported)\n",
" 1: Worked\n",
" 2: Did not work\n",
" ...\n",
" count 24046.000000\n",
" mean 1.390086\n",
" std 0.487779\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 1.000000\n",
" 75% 2.000000\n",
" max 2.000000\n",
" Name: WRK, dtype: float64\n",
"YOEP05: Year of entry for data collected prior to 2012\n",
" bbbb: Not eligible - Born in the US\n",
" 1919: 1919 or earlier (Bottom-coded)\n",
" 1920: 1920\n",
" ...\n",
" count 4268.000000\n",
" mean 1126.390112\n",
" std 990.940648\n",
" min -9.000000\n",
" 25% -9.000000\n",
" 50% 1970.000000\n",
" 75% 1997.000000\n",
" max 2011.000000\n",
" Name: YOEP05, dtype: float64\n",
"YOEP12: Year of entry for data collected in 2012 or later\n",
" bbbb: Not eligible - Born in the US\n",
" 1921: 1921 or earlier (Bottom-coded)\n",
" 1922: 1922 - 1923\n",
" ...\n",
" count 4268.000000\n",
" mean 856.887769\n",
" std 992.461884\n",
" min -9.000000\n",
" 25% -9.000000\n",
" 50% -9.000000\n",
" 75% 1995.000000\n",
" max 2013.000000\n",
" Name: YOEP12, dtype: float64\n",
"ANC: Ancestry recode\n",
" 1: Single\n",
" 2: Multiple\n",
" 3: Unclassified\n",
" ...\n",
" count 30559.000000\n",
" mean 1.555810\n",
" std 0.943709\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 1.000000\n",
" 75% 2.000000\n",
" max 4.000000\n",
" Name: ANC, dtype: float64\n",
"ANC1P05: Recoded Detailed Ancestry for data collected prior to 2012 - first entry\n",
" 001: Alsatian\n",
" 003: Austrian\n",
" 005: Basque\n",
" ...\n",
" count 30559.000000\n",
" mean 339.916359\n",
" std 420.806625\n",
" min -9.000000\n",
" 25% -9.000000\n",
" 50% 50.000000\n",
" 75% 902.000000\n",
" max 999.000000\n",
" Name: ANC1P05, dtype: float64\n",
"ANC1P12: Recoded Detailed Ancestry for data collected in 2012 or later - first entry\n",
" 001: Alsatian\n",
" 003: Austrian\n",
" 005: Basque\n",
" ...\n",
" count 30559.000000\n",
" mean 251.538041\n",
" std 397.645131\n",
" min -9.000000\n",
" 25% -9.000000\n",
" 50% -9.000000\n",
" 75% 615.000000\n",
" max 999.000000\n",
" Name: ANC1P12, dtype: float64\n",
"ANC2P05: Recoded Detailed Ancestry for data collected prior to 2012 - second entry\n",
" 001: Alsatian\n",
" 003: Austrian\n",
" 005: Basque\n",
" ...\n",
" count 30559.000000\n",
" mean 478.113944\n",
" std 494.015425\n",
" min -9.000000\n",
" 25% -9.000000\n",
" 50% 125.000000\n",
" 75% 999.000000\n",
" max 999.000000\n",
" Name: ANC2P05, dtype: float64\n",
"ANC2P12: Recoded Detailed Ancestry for data collected in 2012 or later - second entry\n",
" 001: Alsatian\n",
" 003: Austrian\n",
" 005: Basque\n",
" ...\n",
" count 30559.000000\n",
" mean 346.208515\n",
" std 473.692419\n",
" min -9.000000\n",
" 25% -9.000000\n",
" 50% -9.000000\n",
" 75% 999.000000\n",
" max 999.000000\n",
" Name: ANC2P12, dtype: float64\n",
"DECADE: Decade of entry\n",
" b: N/A (Born in the US)\n",
" 1: Before 1950\n",
" 2: 1950 - 1959\n",
" ...\n",
" count 4268.000000\n",
" mean 5.698454\n",
" std 1.477502\n",
" min 1.000000\n",
" 25% 5.000000\n",
" 50% 6.000000\n",
" 75% 7.000000\n",
" max 7.000000\n",
" Name: DECADE, dtype: float64\n",
"DIS: Disability recode\n",
" 1: With a disability\n",
" 2: Without a disability\n",
" count 30559.000000\n",
" mean 1.871527\n",
" std 0.334621\n",
" min 1.000000\n",
" 25% 2.000000\n",
" 50% 2.000000\n",
" 75% 2.000000\n",
" max 2.000000\n",
" Name: DIS, dtype: float64\n",
"DRIVESP: Number of vehicles calculated from JWRI\n",
" b: N/A (Nonworker or worker who does not drive to work)\n",
" 1: 1.000 vehicles (Drove alone)\n",
" 2: 0.500 vehicles (In a 2-person carpool)\n",
" ...\n",
" count 6211.000000\n",
" mean 1.217517\n",
" std 0.608995\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 1.000000\n",
" 75% 1.000000\n",
" max 6.000000\n",
" Name: DRIVESP, dtype: float64\n",
"ESP: Employment status of parents\n",
" b: N/A (not own child of householder, and not child in subfamily) Living with two parents:\n",
" 1: Both parents in labor force\n",
" 2: Father only in labor force\n",
" ...\n",
" count 4318.000000\n",
" mean 4.348541\n",
" std 2.889376\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 5.000000\n",
" 75% 7.000000\n",
" max 8.000000\n",
" Name: ESP, dtype: float64\n",
"ESR: Employment status recode\n",
" b: N/A (less than 16 years old)\n",
" 1: Civilian employed, at work\n",
" 2: Civilian employed, with a job but not at work\n",
" ...\n",
" count 26399.000000\n",
" mean 2.880071\n",
" std 2.322380\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 1.000000\n",
" 75% 6.000000\n",
" max 6.000000\n",
" Name: ESR, dtype: float64\n",
"FOD1P: Recoded field of degree - first entry\n",
" bbbb: N/A (less than bachelor's degree)\n",
" 1100: GENERAL AGRICULTURE\n",
" 1101: AGRICULTURE PRODUCTION AND MANAGEMENT\n",
" ...\n",
" count 12871.000000\n",
" mean 4518.419470\n",
" std 1596.571638\n",
" min 1100.000000\n",
" 25% 3301.000000\n",
" 50% 5404.000000\n",
" 75% 5507.000000\n",
" max 6403.000000\n",
" Name: FOD1P, dtype: float64\n",
"FOD2P: Recoded field of degree - second entry\n",
" bbbb: N/A (less than bachelor's degree)\n",
" 1100: GENERAL AGRICULTURE\n",
" 1101: AGRICULTURE PRODUCTION AND MANAGEMENT\n",
" ...\n",
" count 2230.000000\n",
" mean 4427.388789\n",
" std 1610.279474\n",
" min 1101.000000\n",
" 25% 2602.000000\n",
" 50% 5200.000000\n",
" 75% 5506.000000\n",
" max 6403.000000\n",
" Name: FOD2P, dtype: float64\n",
"HICOV: Health insurance coverage recode\n",
" 1: With health insurance coverage\n",
" 2: No health insurance coverage\n",
" count 30559.00000\n",
" mean 1.05815\n",
" std 0.23403\n",
" min 1.00000\n",
" 25% 1.00000\n",
" 50% 1.00000\n",
" 75% 1.00000\n",
" max 2.00000\n",
" Name: HICOV, dtype: float64\n",
"HISP: Recoded detailed Hispanic origin\n",
" 01: Not Spanish/Hispanic/Latino\n",
" 02: Mexican\n",
" 03: Puerto Rican\n",
" ...\n",
" count 30559.000000\n",
" mean 1.666808\n",
" std 2.950180\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 1.000000\n",
" 75% 1.000000\n",
" max 24.000000\n",
" Name: HISP, dtype: float64\n",
"INDP: Industry recode based on 2012 IND codes\n",
" ['NOTE: Changes were made to this variable between the 2008-2012 and 2009-2013 5-year PUMS files. For additional information on industry groupings within major categories visit our website at: http://www.census.gov/people/io/methodology.']\n",
" bbbb: Not in universe (less than 16 years old/NILF who last worked more than 5 years ago or never worked)\n",
" 0170: AGR-CROP PRODUCTION\n",
" 0180: AGR-ANIMAL PRODUCTION AND AQUACULTURE\n",
" ...\n",
" count 20557.000000\n",
" mean 7685.842584\n",
" std 1845.245220\n",
" min 170.000000\n",
" 25% 7270.000000\n",
" 50% 7870.000000\n",
" 75% 9160.000000\n",
" max 9920.000000\n",
" Name: INDP, dtype: float64\n",
"JWAP: Time of arrival at work - hour and minute\n",
" bbb: N/A (not a worker; worker who worked at home)\n",
" 001: 12:00 a.m. to 12:04 a.m.\n",
" 002: 12:05 a.m. to 12:09 a.m.\n",
" ...\n",
" count 14545.000000\n",
" mean 106.071640\n",
" std 33.565316\n",
" min 1.000000\n",
" 25% 91.000000\n",
" 50% 100.000000\n",
" 75% 109.000000\n",
" max 284.000000\n",
" Name: JWAP, dtype: float64\n",
"JWDP: Time of departure for work - hour and minute\n",
" bbb: N/A (not a worker; worker who worked at home)\n",
" 001: 12:00 a.m. to 12:29 a.m.\n",
" 002: 12:30 a.m. to 12:59 a.m.\n",
" ...\n",
" count 14545.000000\n",
" mean 57.893022\n",
" std 22.927208\n",
" min 1.000000\n",
" 25% 43.000000\n",
" 50% 55.000000\n",
" 75% 64.000000\n",
" max 150.000000\n",
" Name: JWDP, dtype: float64\n",
"LANP05: Language spoken at home for data collected prior to 2012\n",
" bbb: N/A (less than 5 years old/speaks only English)\n",
" 601: Jamaican Creole\n",
" 607: German\n",
" ...\n",
" count 4231.000000\n",
" mean 362.528717\n",
" std 336.415892\n",
" min -9.000000\n",
" 25% -9.000000\n",
" 50% 620.000000\n",
" 75% 625.000000\n",
" max 994.000000\n",
" Name: LANP05, dtype: float64\n",
"LANP12: Language spoken at home for data collected in 2012 or later\n",
" bbb: N/A (less than 5 years old/speaks only English)\n",
" 601: Jamaican Creole\n",
" 602: Krio\n",
" ...\n",
" count 4231.000000\n",
" mean 290.464429\n",
" std 337.743570\n",
" min -9.000000\n",
" 25% -9.000000\n",
" 50% -9.000000\n",
" 75% 625.000000\n",
" max 994.000000\n",
" Name: LANP12, dtype: float64\n",
"MIGPUMA00: Migration PUMA based on Census 2000 definition for data collected prior to 2012\n",
" bbbbb: N/A (person less than 1 year old/lived in same house 1 year ago)\n",
" 00001: Did not live in the United States or in Puerto Rico one year ago\n",
" 00002: Lived in Puerto Rico one year ago and current residence is in the U.S.\n",
" ...\n",
" count 5832.000000\n",
" mean 465.729767\n",
" std 1096.576145\n",
" min -9.000000\n",
" 25% -9.000000\n",
" 50% 100.000000\n",
" 75% 100.000000\n",
" max 8100.000000\n",
" Name: MIGPUMA00, dtype: float64\n",
"MIGPUMA10: Migration PUMA based on 2010 Census definition for data collected in 2012 or later\n",
" bbbbb: N/A (person less than 1 year old/lived in same house 1 year ago)\n",
" 00001: Did not live in the United States or in Puerto Rico one year ago\n",
" 00002: Lived in Puerto Rico one year ago and current residence is in the U.S.\n",
" ...\n",
" count 5832.000000\n",
" mean 1528.504630\n",
" std 7819.009818\n",
" min -9.000000\n",
" 25% -9.000000\n",
" 50% -9.000000\n",
" 75% 100.000000\n",
" max 59300.000000\n",
" Name: MIGPUMA10, dtype: float64\n",
"MIGSP05: Migration recode for data collected prior to 2012 - State or foreign country code\n",
" bbb: N/A (person less than 1 year old/lived in same house 1 year ago)\n",
" 001: Alabama/AL\n",
" 002: Alaska/AK\n",
" ...\n",
" count 5832.000000\n",
" mean 16.364026\n",
" std 57.178932\n",
" min -9.000000\n",
" 25% -9.000000\n",
" 50% 11.000000\n",
" 75% 12.000000\n",
" max 554.000000\n",
" Name: MIGSP05, dtype: float64\n",
"MIGSP12: Migration recode for data collected in 2012 or later - State or foreign country code\n",
" bbb: N/A (person less than 1 year old/lived in same house 1 year ago)\n",
" 001: Alabama/AL\n",
" 002: Alaska/AK\n",
" ...\n",
" count 5832.000000\n",
" mean 11.033951\n",
" std 50.483716\n",
" min -9.000000\n",
" 25% -9.000000\n",
" 50% -9.000000\n",
" 75% 11.000000\n",
" max 555.000000\n",
" Name: MIGSP12, dtype: float64\n",
"MSP: Married, spouse present/spouse absent\n",
" b: N/A (age less than 15 years)\n",
" 1: Now married, spouse present\n",
" 2: Now married, spouse absent\n",
" ...\n",
" count 26658.000000\n",
" mean 4.202491\n",
" std 2.152624\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 6.000000\n",
" 75% 6.000000\n",
" max 6.000000\n",
" Name: MSP, dtype: float64\n",
"NAICSP: NAICS Industry code based on 2012 NAICS codes\n",
" ['NOTE: Changes were made to this variable between the 2008-2012 and 2009-2013 5-year PUMS files. For additional information on NAICS groupings within major categories visit our website at: http://www.census.gov/people/io/methodology.']\n",
" bbbbbbbb: Not in universe (less than 16 years old/NILF who last worked more than 5 years ago or never worked)\n",
" 111 : AGR-CROP PRODUCTION\n",
" 112 : AGR-ANIMAL PRODUCTION AND AQUACULTURE\n",
" ...\n",
" count 20557\n",
" unique 223\n",
" top 722Z\n",
" freq 993\n",
" Name: NAICSP, dtype: object\n",
"NATIVITY: Nativity\n",
" 1: Native\n",
" 2: Foreign born\n",
" count 30559.000000\n",
" mean 1.123237\n",
" std 0.328714\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 1.000000\n",
" 75% 1.000000\n",
" max 2.000000\n",
" Name: NATIVITY, dtype: float64\n",
"NOP: Nativity of parent\n",
" b: N/A (greater than 17 years old/not an own child of householder, and not child in subfamily)\n",
" 1: Living with two parents: Both parents NATIVE\n",
" 2: Living with two parents: Father only FOREIGN BORN\n",
" ...\n",
" count 4312.000000\n",
" mean 4.474954\n",
" std 2.696136\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 5.000000\n",
" 75% 7.000000\n",
" max 8.000000\n",
" Name: NOP, dtype: float64\n",
"OC: Own child\n",
" 0: No (includes GQ)\n",
" 1: Yes\n",
" count 30559.000000\n",
" mean 0.124775\n",
" std 0.330469\n",
" min 0.000000\n",
" 25% 0.000000\n",
" 50% 0.000000\n",
" 75% 0.000000\n",
" max 1.000000\n",
" Name: OC, dtype: float64\n",
"OCCP02: Occupation recode for data collected in 2009 based on 2002 OCC codes\n",
" bbbb: Not in universe (less than 16 years old/NILF who last worked more than 5 years ago or never worked)\n",
" 0010: MGR-CHIEF EXECUTIVES AND LEGISLATORS\n",
" 0020: MGR-GENERAL AND OPERATIONS MANAGERS\n",
" ...\n",
" count 20557\n",
" unique 314\n",
" top N.A.\n",
" freq 16866\n",
" Name: OCCP02, dtype: object\n",
"OCCP10: Occupation recode for data collected in 2010 and 2011 based on 2010 OCC codes\n",
" bbbb: Not in universe (less than 16 years old/NILF who last worked more than 5 years ago or never worked)\n",
" 0010: MGR-CHIEF EXECUTIVES AND LEGISLATORS\n",
" 0020: MGR-GENERAL AND OPERATIONS MANAGERS\n",
" ...\n",
" count 20557\n",
" unique 370\n",
" top N.A.\n",
" freq 12485\n",
" Name: OCCP10, dtype: object\n",
"OCCP12: Occupation recode for data collected in 2012 or later based on 2010 OCC codes\n",
" ['NOTE: For additional information on NAICS and SOC groupings within major categories visit our website at: http://www.census.gov/people/io/methodology.']\n",
" bbbb: N/A (less than 16 years old/NILF who last worked more than 5 years ago or never worked)\n",
" 0010: MGR-CHIEF EXECUTIVES AND LEGISLATORS\n",
" 0020: MGR-GENERAL AND OPERATIONS MANAGERS\n",
" ...\n",
" count 20557\n",
" unique 357\n",
" top N.A.\n",
" freq 11763\n",
" Name: OCCP12, dtype: object\n",
"PAOC: Presence and age of own children\n",
" b: N/A (male/female under 16 years old/GQ)\n",
" 1: Females with own children under 6 years only\n",
" 2: Females with own children 6 to 17 years only\n",
" ...\n",
" count 13083.000000\n",
" mean 3.616372\n",
" std 0.875946\n",
" min 1.000000\n",
" 25% 4.000000\n",
" 50% 4.000000\n",
" 75% 4.000000\n",
" max 4.000000\n",
" Name: PAOC, dtype: float64\n",
"PERNP: Total person's earnings\n",
" ['Note: Use values from ADJINC to adjust PERNP to constant dollars.']\n",
" bbbbbbb: N/A (less than 15 years old)\n",
" 0000000: No earnings\n",
" -010000: Loss of $10000 or more (Rounded & bottom-coded components)\n",
" ...\n",
" count 26399.000000\n",
" mean 44739.192053\n",
" std 77239.316348\n",
" min -9100.000000\n",
" 25% 0.000000\n",
" 50% 20000.000000\n",
" 75% 62000.000000\n",
" max 1360000.000000\n",
" Name: PERNP, dtype: float64\n",
"PINCP: Total person's income (signed)\n",
" ['Note: Use values from ADJINC to adjust PINCP to constant dollars.']\n",
" bbbbbbb: N/A (less than 15 years old)\n",
" 0000000: None\n",
" -019999: Loss of $19999 or more (Rounded & bottom-coded components)\n",
" ...\n",
" count 26658.000000\n",
" mean 53262.965339\n",
" std 82561.741382\n",
" min -13600.000000\n",
" 25% 7200.000000\n",
" 50% 30000.000000\n",
" 75% 70000.000000\n",
" max 1471000.000000\n",
" Name: PINCP, dtype: float64\n",
"POBP05: Place of birth (Recode) for data collected prior to 2012\n",
" 001: Alabama/AL\n",
" 002: Alaska/AK\n",
" 004: Arizona/AZ\n",
" ...\n",
" count 30559.000000\n",
" mean 29.560948\n",
" std 80.011666\n",
" min -9.000000\n",
" 25% -9.000000\n",
" 50% 11.000000\n",
" 75% 31.000000\n",
" max 554.000000\n",
" Name: POBP05, dtype: float64\n",
"POBP12: Place of birth (Recode) for data collected in 2012 or later\n",
" 001: Alabama/AL\n",
" 002: Alaska/AK\n",
" 004: Arizona/AZ\n",
" ...\n",
" count 30559.000000\n",
" mean 19.925554\n",
" std 72.939454\n",
" min -9.000000\n",
" 25% -9.000000\n",
" 50% -9.000000\n",
" 75% 12.000000\n",
" max 515.000000\n",
" Name: POBP12, dtype: float64\n",
"POVPIP: Income-to-poverty ratio recode\n",
" bbb: N/A\n",
" 000..500: Below 501 percent\n",
" 501: 501 percent or more\n",
" ...\n",
" count 28378.000000\n",
" mean 331.494045\n",
" std 181.116198\n",
" min 0.000000\n",
" 25% 158.000000\n",
" 50% 398.000000\n",
" 75% 501.000000\n",
" max 501.000000\n",
" Name: POVPIP, dtype: float64\n",
"POWPUMA00: Place of work PUMA based on Census 2000 definition for data collected prior to 2012\n",
" bbbbb: N/A (not a worker--not in the labor force, including persons under 16 years; unemployed; civilian employed, with a job not at work; Armed Forces, with a job but not at work)\n",
" 00001: Did not work in the United States or in Puerto Rico\n",
" 00100..08200: Assigned Place of work PUMA. Use with POWSP05.\n",
" ...\n",
" count 15327.000000\n",
" mean 144.907810\n",
" std 345.593937\n",
" min -9.000000\n",
" 25% -9.000000\n",
" 50% 100.000000\n",
" 75% 100.000000\n",
" max 6890.000000\n",
" Name: POWPUMA00, dtype: float64\n",
"POWPUMA10: Place of work PUMA based on 2010 Census definition for data collected in 2012 or later\n",
" bbbbb: N/A (not a worker--not in the labor force, including persons under 16 years; unemployed; civilian employed, with a job not at work; Armed Forces, with a job but not at work)\n",
" 00001: Did not work in the United States or in Puerto Rico\n",
" 00100..70100: Assigned Place of work PUMA. Use with POWSP12.\n",
" ...\n",
" count 15327.000000\n",
" mean 1563.770470\n",
" std 8888.575098\n",
" min -9.000000\n",
" 25% -9.000000\n",
" 50% -9.000000\n",
" 75% 100.000000\n",
" max 59300.000000\n",
" Name: POWPUMA10, dtype: float64\n",
"POWSP05: Place of work for data collected prior to 2012 - State or foreign country recode\n",
" bbb: N/A (not a worker--not in the labor force, including persons under 16 years; unemployed; employed, with a job not at work; Armed Forces, with a job but not at work)\n",
" 001: Alabama/AL\n",
" 002: Alaska/AK\n",
" ...\n",
" count 15327.000000\n",
" mean 6.471978\n",
" std 18.985725\n",
" min -9.000000\n",
" 25% -9.000000\n",
" 50% 11.000000\n",
" 75% 11.000000\n",
" max 555.000000\n",
" Name: POWSP05, dtype: float64\n",
"POWSP12: Place of work for data collected in 2012 or later - State or foreign country recode\n",
" bbb: N/A (not a worker--not in the labor force, including persons under 16 years; unemployed; employed, with a job not at work; Armed Forces, with a job but not at work)\n",
" 001: Alabama/AL\n",
" 002: Alaska/AK\n",
" ...\n",
" count 15327.000000\n",
" mean 2.357539\n",
" std 17.856144\n",
" min -9.000000\n",
" 25% -9.000000\n",
" 50% -9.000000\n",
" 75% 11.000000\n",
" max 555.000000\n",
" Name: POWSP12, dtype: float64\n",
"PRIVCOV: Private health insurance coverage recode\n",
" 1: With private health insurance coverage\n",
" 2: Without private health insurance coverage\n",
" count 30559.000000\n",
" mean 1.283877\n",
" std 0.450885\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 1.000000\n",
" 75% 2.000000\n",
" max 2.000000\n",
" Name: PRIVCOV, dtype: float64\n",
"PUBCOV: Public health coverage recode\n",
" 1: With public health coverage\n",
" 2: Without public health coverage\n",
" count 30559.000000\n",
" mean 1.651461\n",
" std 0.476515\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 2.000000\n",
" 75% 2.000000\n",
" max 2.000000\n",
" Name: PUBCOV, dtype: float64\n",
"QTRBIR: Quarter of birth\n",
" 1: January through March\n",
" 2: April through June\n",
" 3: July through September\n",
" ...\n",
" count 30559.000000\n",
" mean 2.520665\n",
" std 1.111741\n",
" min 1.000000\n",
" 25% 2.000000\n",
" 50% 3.000000\n",
" 75% 4.000000\n",
" max 4.000000\n",
" Name: QTRBIR, dtype: float64\n",
"RAC1P: Recoded detailed race code\n",
" 1: White alone\n",
" 2: Black or African American alone\n",
" 3: American Indian alone\n",
" ...\n",
" count 30559.000000\n",
" mean 2.025132\n",
" std 1.739391\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 2.000000\n",
" 75% 2.000000\n",
" max 9.000000\n",
" Name: RAC1P, dtype: float64\n",
"RAC2P05: Recoded detailed race code for data collected prior to 2012\n",
" 01: White alone\n",
" 02: Black or African American alone\n",
" 03: Apache alone\n",
" ...\n",
" count 30559.000000\n",
" mean -0.339834\n",
" std 13.838677\n",
" min -9.000000\n",
" 25% -9.000000\n",
" 50% 1.000000\n",
" 75% 2.000000\n",
" max 67.000000\n",
" Name: RAC2P05, dtype: float64\n",
"RAC2P12: Recoded detailed race code for data collected in 2012 or later\n",
" 01: White alone\n",
" 02: Black or African American alone\n",
" 03: Apache alone\n",
" ...\n",
" count 30559.000000\n",
" mean -2.417291\n",
" std 13.208816\n",
" min -9.000000\n",
" 25% -9.000000\n",
" 50% -9.000000\n",
" 75% 1.000000\n",
" max 68.000000\n",
" Name: RAC2P12, dtype: float64\n",
"RAC3P05: Recoded detailed race code for data collected prior to 2012\n",
" 01: Some other race alone\n",
" 02: Other Pacific Islander alone\n",
" 03: Samoan alone\n",
" ...\n",
" count 30559.000000\n",
" mean 27.004778\n",
" std 33.030433\n",
" min -9.000000\n",
" 25% -9.000000\n",
" 50% 44.000000\n",
" 75% 68.000000\n",
" max 72.000000\n",
" Name: RAC3P05, dtype: float64\n",
"RAC3P12: Recoded detailed race code for data collected in 2012 or later\n",
" 001: White alone\n",
" 002: Black or African American alone\n",
" 003: American Indian and Alaska Native alone\n",
" ...\n",
" count 30559.000000\n",
" mean -4.015347\n",
" std 7.034421\n",
" min -9.000000\n",
" 25% -9.000000\n",
" 50% -9.000000\n",
" 75% 1.000000\n",
" max 95.000000\n",
" Name: RAC3P12, dtype: float64\n",
"RACAIAN: American Indian and Alaska Native recode (American Indian and Alaska Native alone or in combination with one or more other races)\n",
" 0: No\n",
" 1: Yes\n",
" count 30559.000000\n",
" mean 0.009490\n",
" std 0.096954\n",
" min 0.000000\n",
" 25% 0.000000\n",
" 50% 0.000000\n",
" 75% 0.000000\n",
" max 1.000000\n",
" Name: RACAIAN, dtype: float64\n",
"RACASN: Asian recode (Asian alone or in combination with one or more other races)\n",
" 0: No\n",
" 1: Yes\n",
" count 30559.000000\n",
" mean 0.044308\n",
" std 0.205781\n",
" min 0.000000\n",
" 25% 0.000000\n",
" 50% 0.000000\n",
" 75% 0.000000\n",
" max 1.000000\n",
" Name: RACASN, dtype: float64\n",
"RACBLK: Black or African American recode (Black alone or in combination with one or more other races)\n",
" 0: No\n",
" 1: Yes\n",
" count 30559.000000\n",
" mean 0.497464\n",
" std 0.500002\n",
" min 0.000000\n",
" 25% 0.000000\n",
" 50% 0.000000\n",
" 75% 1.000000\n",
" max 1.000000\n",
" Name: RACBLK, dtype: float64\n",
"RACNHPI: Native Hawaiian and Other Pacific Islander recode (Native Hawaiian and Other Pacific Islander alone or in combination with one or more other races)\n",
" 0: No\n",
" 1: Yes\n",
" count 30559.000000\n",
" mean 0.001080\n",
" std 0.032844\n",
" min 0.000000\n",
" 25% 0.000000\n",
" 50% 0.000000\n",
" 75% 0.000000\n",
" max 1.000000\n",
" Name: RACNHPI, dtype: float64\n",
"RACNUM: Number of major race groups represented\n",
" 1..6: Race groups\n",
" count 30559.000000\n",
" mean 1.027291\n",
" std 0.185123\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 1.000000\n",
" 75% 1.000000\n",
" max 4.000000\n",
" Name: RACNUM, dtype: float64\n",
"RACSOR: Some other race recode (Some other race alone or in combination with one or more other races)\n",
" 0: No\n",
" 1: Yes\n",
" count 30559.000000\n",
" mean 0.027750\n",
" std 0.164257\n",
" min 0.000000\n",
" 25% 0.000000\n",
" 50% 0.000000\n",
" 75% 0.000000\n",
" max 1.000000\n",
" Name: RACSOR, dtype: float64\n",
"RACWHT: White recode (White alone or in combination with one or more other races)\n",
" 0: No\n",
" 1: Yes\n",
" count 30559.000000\n",
" mean 0.447200\n",
" std 0.497213\n",
" min 0.000000\n",
" 25% 0.000000\n",
" 50% 0.000000\n",
" 75% 1.000000\n",
" max 1.000000\n",
" Name: RACWHT, dtype: float64\n",
"RC: Related child\n",
" 0: No (includes GQ)\n",
" 1: Yes\n",
" count 30559.000000\n",
" mean 0.149939\n",
" std 0.357018\n",
" min 0.000000\n",
" 25% 0.000000\n",
" 50% 0.000000\n",
" 75% 0.000000\n",
" max 1.000000\n",
" Name: RC, dtype: float64\n",
"SCIENGP: Field of degree science and engineering flag - NSF definition\n",
" b: N/A (less than a bachelor's degree)\n",
" 1: Yes\n",
" 2: No\n",
" ...\n",
" count 12871.000000\n",
" mean 1.479605\n",
" std 0.499603\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 1.000000\n",
" 75% 2.000000\n",
" max 2.000000\n",
" Name: SCIENGP, dtype: float64\n",
"SCIENGRLP: Field of degree science and engineering related flag - NSF definition\n",
" b: N/A (less than a bachelor's degree)\n",
" 1: Yes\n",
" 2: No\n",
" ...\n",
" count 12871.000000\n",
" mean 1.951674\n",
" std 0.214462\n",
" min 1.000000\n",
" 25% 2.000000\n",
" 50% 2.000000\n",
" 75% 2.000000\n",
" max 2.000000\n",
" Name: SCIENGRLP, dtype: float64\n",
"SFN: Subfamily number\n",
" b: N/A (GQ/not in a subfamily)\n",
" 1: In subfamily 1\n",
" 2: In subfamily 2\n",
" ...\n",
" count 923\n",
" mean 1\n",
" std 0\n",
" min 1\n",
" 25% 1\n",
" 50% 1\n",
" 75% 1\n",
" max 1\n",
" Name: SFN, dtype: float64\n",
"SFR: Subfamily relationship\n",
" b: N/A (GQ/not in a subfamily)\n",
" 1: Husband/wife no children\n",
" 2: Husband/wife with children\n",
" ...\n",
" count 923.000000\n",
" mean 3.963164\n",
" std 1.375627\n",
" min 1.000000\n",
" 25% 3.000000\n",
" 50% 5.000000\n",
" 75% 5.000000\n",
" max 6.000000\n",
" Name: SFR, dtype: float64\n",
"SOCP00: SOC Occupation code for data collected in 2009 based on 2000 SOC codes\n",
" bbbbbb: Not in universe (less than 16 years old/NILF who last worked more than 5 years ago or never worked)\n",
" 111021: MGR-GENERAL AND OPERATIONS MANAGERS\n",
" 1110XX: MGR-CHIEF EXECUTIVES AND LEGISLATORS *\n",
" ...\n",
" count 20557\n",
" unique 314\n",
" top N.A.//\n",
" freq 16866\n",
" Name: SOCP00, dtype: object\n",
"SOCP10: SOC Occupation code for data collected in 2010 and 2011 based on 2010 SOC codes\n",
" bbbbbb: Not in universe (less than 16 years old/NILF who last worked more than 5 years ago or never worked)\n",
" 1110XX: MGR-CHIEF EXECUTIVES AND LEGISLATORS*\n",
" 111021: MGR-GENERAL AND OPERATIONS MANAGERS\n",
" ...\n",
" count 20557\n",
" unique 370\n",
" top N.A.//\n",
" freq 12485\n",
" Name: SOCP10, dtype: object\n",
"SOCP12: SOC Occupation recode for data collected in 2012 or later based on 2010 SOC codes\n",
" ['NOTE: For additional information on NAICS and SOC groupings within major categories visit our website at: http://www.census.gov/people/io/methodology.']\n",
" bbbbbb: N/A (less than 16 years old/NILF who last worked more than 5 years ago or never worked)\n",
" 1110XX: MGR-CHIEF EXECUTIVES AND LEGISLATORS *\n",
" 111021: MGR-GENERAL AND OPERATIONS MANAGERS\n",
" ...\n",
" count 20557\n",
" unique 357\n",
" top N.A.//\n",
" freq 11763\n",
" Name: SOCP12, dtype: object\n",
"VPS: Veteran period of service\n",
" bb: N/A (less than 17 years old, no active duty) War Times:\n",
" 01: Gulf War: 9/2001 or later\n",
" 02: Gulf War: 9/2001 or later and Gulf War: 8/1990 - 8/2001\n",
" ...\n",
" count 1855.000000\n",
" mean 6.997844\n",
" std 4.099096\n",
" min 1.000000\n",
" 25% 4.000000\n",
" 50% 6.000000\n",
" 75% 11.000000\n",
" max 15.000000\n",
" Name: VPS, dtype: float64\n",
"WAOB: World area of birth ****\n",
" 1: US state (POB = 001-059)\n",
" 2: PR and US Island Areas (POB = 060-099)\n",
" 3: Latin America (POB = 303,310-399)\n",
" ...\n",
" count 30559.000000\n",
" mean 1.457999\n",
" std 1.235133\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 1.000000\n",
" 75% 1.000000\n",
" max 8.000000\n",
" Name: WAOB, dtype: float64\n",
"num columns described = 145\n",
"\n",
"HOUSING RECORD\n",
"insp: Column not in data dictionary.\n",
" count 6561.000000\n",
" mean 999.282731\n",
" std 1085.174484\n",
" min 0.000000\n",
" 25% 370.000000\n",
" 50% 790.000000\n",
" 75% 1200.000000\n",
" max 8600.000000\n",
" Name: insp, dtype: float64\n",
"RT: Record Type\n",
" H: Housing Record or Group Quarters Unit\n",
" count 17500\n",
" unique 1\n",
" top H\n",
" freq 17500\n",
" Name: RT, dtype: object\n",
"SERIALNO: Housing unit/GQ person serial number\n",
" 2009000000001..2013999999999: Unique identifier\n",
" count 1.750000e+04\n",
" mean 2.011068e+12\n",
" std 1.401911e+09\n",
" min 2.009000e+12\n",
" 25% 2.010000e+12\n",
" 50% 2.011001e+12\n",
" 75% 2.012001e+12\n",
" max 2.013001e+12\n",
" Name: SERIALNO, dtype: float64\n",
"DIVISION: Division code\n",
" 0: Puerto Rico\n",
" 1: New England (Northeast region)\n",
" 2: Middle Atlantic (Northeast region)\n",
" ...\n",
" count 17500\n",
" mean 5\n",
" std 0\n",
" min 5\n",
" 25% 5\n",
" 50% 5\n",
" 75% 5\n",
" max 5\n",
" Name: DIVISION, dtype: float64\n",
"PUMA00: Public use microdata area code (PUMA) based on Census 2000 definition for data collected prior to 2012. Use in combination with PUMA10.\n",
" ['Note: Public Use Microdata Areas (PUMAs) designate areas of 100,000 or more population. Use with ST for unique code. PUMA00 applies to data collected in calendar years 2011 and earlier. Users of multi-year datasets that contain data from before and after 2012 should use PUMA00 and PUMA10 together where possible, but not all PUMAs will be comparable. See http://tigerweb.geo.census.gov to identify PUMA changes between the two vintages.']\n",
" 00100..08200: Public use microdata area codes\n",
" 77777: Combination of 01801, 01802, and 01905 in Louisiana\n",
" -0009: Code classification is Not Applicable because data collected in 2012 or later\n",
" ...\n",
" count 17500.000000\n",
" mean 56.427371\n",
" std 55.291036\n",
" min -9.000000\n",
" 25% -9.000000\n",
" 50% 101.000000\n",
" 75% 103.000000\n",
" max 105.000000\n",
" Name: PUMA00, dtype: float64\n",
"PUMA10: Public use microdata area code (PUMA) based on 2010 Census definition for data collected in 2012 or later. Use in combination with PUMA00.\n",
" ['Note: Public Use Microdata Areas (PUMAs) designate areas of 100,000 or more population. Use with ST for unique code. PUMA10 applies to data collected in calendar year 2012 and later. Users of multi-year datasets that contain data from before and after 2012 should use PUMA00 and PUMA10 together where possible, but not all PUMAs will be comparable. See http://tigerweb.geo.census.gov to identify PUMA changes between the two vintages.']\n",
" 00100..70301: Public use microdata area codes\n",
" -0009: Code classification is Not Applicable because data collected prior to 2012\n",
" count 17500.000000\n",
" mean 37.764171\n",
" std 55.358495\n",
" min -9.000000\n",
" 25% -9.000000\n",
" 50% -9.000000\n",
" 75% 103.000000\n",
" max 105.000000\n",
" Name: PUMA10, dtype: float64\n",
"REGION: Region code\n",
" 1: Northeast\n",
" 2: Midwest\n",
" 3: South\n",
" ...\n",
" count 17500\n",
" mean 3\n",
" std 0\n",
" min 3\n",
" 25% 3\n",
" 50% 3\n",
" 75% 3\n",
" max 3\n",
" Name: REGION, dtype: float64\n",
"ST: State Code\n",
" 01: Alabama/AL\n",
" 02: Alaska/AK\n",
" 04: Arizona/AZ\n",
" ...\n",
" count 17500\n",
" mean 11\n",
" std 0\n",
" min 11\n",
" 25% 11\n",
" 50% 11\n",
" 75% 11\n",
" max 11\n",
" Name: ST, dtype: float64\n",
"ADJHSG: Adjustment factor for housing dollar amounts (6 implied decimal places)\n",
" ['Note: The values of ADJHSG inflation-adjusts reported housing costs to 2013 dollars and applies to variables CONP, ELEP, FULP, GASP, GRNTP, INSP, MHP, MRGP, SMOCP, RNTP, SMP, and WATP in the housing record. ADJHSG does not apply to AGS or TAXP because they are categorical variables that should not be inflation-adjusted.']\n",
" 1086032: 2009 factor\n",
" 1068395: 2010 factor\n",
" 1035725: 2011 factor\n",
" ...\n",
" count 17500.000000\n",
" mean 1039364.231657\n",
" std 31877.254257\n",
" min 1000000.000000\n",
" 25% 1014531.000000\n",
" 50% 1035725.000000\n",
" 75% 1068395.000000\n",
" max 1086032.000000\n",
" Name: ADJHSG, dtype: float64\n",
"ADJINC: Adjustment factor for income and earnings dollar amounts (6 implied decimal places)\n",
" ['Note: The values of ADJINC inflation-adjusts reported income to 2013 dollars. ADJINC incorporates an adjustment that annualizes the different rolling reference periods for reported income (as done in the single-year data using the variable ADJINC from the 1-year file) and an adjustment to inflation-adjust the annualized income to 2013 dollars. ADJINC applies to variables FINCP and HINCP in the housing record, and variables INTP, OIP, PAP, PERNP, PINCP, RETP, SEMP, SSIP, SSP, and WAGP in the person record.']\n",
" 1085467: 2009 factor (0.999480 * 1.08603175)\n",
" 1076540: 2010 factor (1.007624 * 1.06839475)\n",
" 1054614: 2011 factor (1.018237 * 1.03572510)\n",
" ...\n",
" count 17500.000000\n",
" mean 1048478.770229\n",
" std 29598.269890\n",
" min 1007549.000000\n",
" 25% 1024887.000000\n",
" 50% 1054614.000000\n",
" 75% 1076540.000000\n",
" max 1085467.000000\n",
" Name: ADJINC, dtype: float64\n",
"WGTP: Housing Weight\n",
" 0000: Group Quarter placeholder record\n",
" 00001..09999: Integer weight of housing unit\n",
" count 17500.000000\n",
" mean 17.047257\n",
" std 13.878535\n",
" min 0.000000\n",
" 25% 10.000000\n",
" 50% 14.000000\n",
" 75% 22.000000\n",
" max 172.000000\n",
" Name: WGTP, dtype: float64\n",
"NP: Number of person records following this housing record\n",
" 00: Vacant unit\n",
" 01: One person record (one person in household or any person in group quarters)\n",
" 02..20: Number of person records (number of persons in household)\n",
" ...\n",
" count 17500.000000\n",
" mean 1.746229\n",
" std 1.291371\n",
" min 0.000000\n",
" 25% 1.000000\n",
" 50% 1.000000\n",
" 75% 2.000000\n",
" max 13.000000\n",
" Name: NP, dtype: float64\n",
"TYPE: Type of unit\n",
" 1: Housing unit\n",
" 2: Institutional group quarters\n",
" 3: Noninstitutional group quarters\n",
" ...\n",
" count 17500.000000\n",
" mean 1.268514\n",
" std 0.655686\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 1.000000\n",
" 75% 1.000000\n",
" max 3.000000\n",
" Name: TYPE, dtype: float64\n",
"ACR: Lot size\n",
" b: N/A (GQ/not a one-family house or mobile home)\n",
" 1: House on less than one acre\n",
" 2: House on one to less than ten acres\n",
" ...\n",
" count 6388.000000\n",
" mean 1.028961\n",
" std 0.182034\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 1.000000\n",
" 75% 1.000000\n",
" max 3.000000\n",
" Name: ACR, dtype: float64\n",
"AGS: Sales of Agriculture Products (Yearly sales)\n",
" ['Note: No adjustment factor is applied to AGS.']\n",
" b: N/A (GQ/vacant/not a one-family house or mobile home/less than 1 acre)\n",
" 1: None\n",
" 2: $ 1 - $ 999\n",
" ...\n",
" count 164.000000\n",
" mean 1.201220\n",
" std 0.934544\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 1.000000\n",
" 75% 1.000000\n",
" max 6.000000\n",
" Name: AGS, dtype: float64\n",
"BATH: Bathtub or shower\n",
" b: N/A (GQ)\n",
" 1: Yes\n",
" 2: No\n",
" ...\n",
" count 14844.000000\n",
" mean 1.005322\n",
" std 0.072760\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 1.000000\n",
" 75% 1.000000\n",
" max 2.000000\n",
" Name: BATH, dtype: float64\n",
"BDSP: Number of bedrooms\n",
" bb : N/A (GQ)\n",
" 00..99: 0 to 99 bedrooms (Top-coded)\n",
" count 14844.000000\n",
" mean 2.171989\n",
" std 1.452519\n",
" min 0.000000\n",
" 25% 1.000000\n",
" 50% 2.000000\n",
" 75% 3.000000\n",
" max 14.000000\n",
" Name: BDSP, dtype: float64\n",
"BLD: Units in structure\n",
" bb: N/A (GQ)\n",
" 01: Mobile home or trailer\n",
" 02: One-family house detached\n",
" ...\n",
" count 14844.000000\n",
" mean 5.365131\n",
" std 2.671220\n",
" min 2.000000\n",
" 25% 3.000000\n",
" 50% 5.000000\n",
" 75% 8.000000\n",
" max 9.000000\n",
" Name: BLD, dtype: float64\n",
"BUS: Business or medical office on property\n",
" b: N/A (GQ/not a one-family house or mobile home)\n",
" 1: Yes\n",
" 2: No\n",
" ...\n",
" count 6388.000000\n",
" mean 1.986381\n",
" std 0.115913\n",
" min 1.000000\n",
" 25% 2.000000\n",
" 50% 2.000000\n",
" 75% 2.000000\n",
" max 2.000000\n",
" Name: BUS, dtype: float64\n",
"CONP: Condo fee (monthly amount)\n",
" ['Note: Use values from ADJHSG to adjust CONP to constant dollars.']\n",
" bbbb: N/A (GQ/vacant/not owned or being bought)\n",
" 0000: Not condo\n",
" 0001..9999: $1 - $9999 (Rounded and top-coded)\n",
" ...\n",
" count 14844.000000\n",
" mean 51.061035\n",
" std 175.491301\n",
" min 0.000000\n",
" 25% 0.000000\n",
" 50% 0.000000\n",
" 75% 0.000000\n",
" max 1700.000000\n",
" Name: CONP, dtype: float64\n",
"ELEP: Electricity (monthly cost)\n",
" ['Note: Use values from ADJHSG to adjust ELEP values 3 and over to constant dollars.']\n",
" bbb: N/A (GQ/vacant)\n",
" 001: Included in rent or in condo fee\n",
" 002: No charge or electricity not used\n",
" ...\n",
" count 13737.000000\n",
" mean 92.849967\n",
" std 94.873654\n",
" min 1.000000\n",
" 25% 30.000000\n",
" 50% 70.000000\n",
" 75% 130.000000\n",
" max 570.000000\n",
" Name: ELEP, dtype: float64\n",
"FS: Yearly food stamp/Supplemental Nutrition Assistance Program recipiency\n",
" b: N/A (vacant)\n",
" 1: Yes\n",
" 2: No\n",
" ...\n",
" count 16393.000000\n",
" mean 1.874093\n",
" std 0.331755\n",
" min 1.000000\n",
" 25% 2.000000\n",
" 50% 2.000000\n",
" 75% 2.000000\n",
" max 2.000000\n",
" Name: FS, dtype: float64\n",
"FULP: Fuel cost(yearly cost for fuels other than gas and electricity)\n",
" ['Note: Use values from ADJHSG to adjust FULP values 3 and over to constant dollars.']\n",
" bbbb: N/A (GQ/vacant)\n",
" 0001: Included in rent or in condo fee\n",
" 0002: No charge or these fuels not used\n",
" ...\n",
" count 13737.000000\n",
" mean 45.400306\n",
" std 311.391053\n",
" min 1.000000\n",
" 25% 2.000000\n",
" 50% 2.000000\n",
" 75% 2.000000\n",
" max 5200.000000\n",
" Name: FULP, dtype: float64\n",
"GASP: Gas (monthly cost)\n",
" ['Note: Use values from ADJHSG to adjust GASP values 4 and over to constant dollars.']\n",
" bbb: N/A (GQ/vacant)\n",
" 001: Included in rent or in condo fee\n",
" 002: Included in electricity payment\n",
" ...\n",
" count 13737.000000\n",
" mean 56.089758\n",
" std 88.354993\n",
" min 1.000000\n",
" 25% 3.000000\n",
" 50% 10.000000\n",
" 75% 80.000000\n",
" max 580.000000\n",
" Name: GASP, dtype: float64\n",
"HFL: House heating fuel\n",
" b: N/A (GQ/vacant)\n",
" 1: Utility gas\n",
" 2: Bottled, tank, or LP gas\n",
" ...\n",
" count 13737.000000\n",
" mean 1.878867\n",
" std 1.306775\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 1.000000\n",
" 75% 3.000000\n",
" max 9.000000\n",
" Name: HFL, dtype: float64\n",
"MHP: Mobile home costs (yearly amount)\n",
" ['Note: Use values from ADJHSG to adjust MHP to constant dollars.']\n",
" bbbbb: N/A (GQ/vacant/not owned or being bought/ not mobile home)\n",
" 00000: None\n",
" 00001..99999: $1 to $99999 (Rounded and top-coded)\n",
" ...\n",
" count 0\n",
" mean NaN\n",
" std NaN\n",
" min NaN\n",
" 25% NaN\n",
" 50% NaN\n",
" 75% NaN\n",
" max NaN\n",
" Name: MHP, dtype: float64\n",
"MRGI: First mortgage payment includes fire/hazard/flood insurance\n",
" b: N/A (GQ/vacant/not owned or being bought/not mortgaged)\n",
" 1: Yes, insurance included in payment\n",
" 2: No, insurance paid separately or no insurance\n",
" ...\n",
" count 4971.000000\n",
" mean 1.446590\n",
" std 0.497189\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 1.000000\n",
" 75% 2.000000\n",
" max 2.000000\n",
" Name: MRGI, dtype: float64\n",
"MRGP: First mortgage payment (monthly amount)\n",
" ['Note: Use values from ADJHSG to adjust MRGP to constant dollars.']\n",
" bbbbb: N/A (GQ/vacant/not owned or being bought/not mortgaged)\n",
" 00001..99999: $1 to $99999 (Rounded and top-coded)\n",
" count 4971.000000\n",
" mean 1936.767250\n",
" std 1244.418262\n",
" min 10.000000\n",
" 25% 1000.000000\n",
" 50% 1700.000000\n",
" 75% 2500.000000\n",
" max 8000.000000\n",
" Name: MRGP, dtype: float64\n",
"MRGT: First mortgage payment includes real estate taxes\n",
" b: N/A (GQ/vacant/not owned or being bought/not mortgaged)\n",
" 1: Yes, taxes included in payment\n",
" 2: No, taxes paid separately or taxes not required\n",
" ...\n",
" count 4971.000000\n",
" mean 1.281633\n",
" std 0.449841\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 1.000000\n",
" 75% 2.000000\n",
" max 2.000000\n",
" Name: MRGT, dtype: float64\n",
"MRGX: First mortgage status\n",
" b: N/A (GQ/vacant/not owned or being bought)\n",
" 1: Mortgage, deed of trust, or similar debt\n",
" 2: Contract to purchase\n",
" ...\n",
" count 6561.000000\n",
" mean 1.490626\n",
" std 0.857150\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 1.000000\n",
" 75% 1.000000\n",
" max 3.000000\n",
" Name: MRGX, dtype: float64\n",
"REFR: Refrigerator\n",
" b: N/A (GQ)\n",
" 1: Yes\n",
" 2: No\n",
" ...\n",
" count 14844.000000\n",
" mean 1.007949\n",
" std 0.088807\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 1.000000\n",
" 75% 1.000000\n",
" max 2.000000\n",
" Name: REFR, dtype: float64\n",
"RMSP: Number of Rooms\n",
" bb: N/A (GQ)\n",
" 00..99: Rooms (Top-coded)\n",
" count 14844.000000\n",
" mean 5.139854\n",
" std 2.895583\n",
" min 1.000000\n",
" 25% 3.000000\n",
" 50% 4.000000\n",
" 75% 7.000000\n",
" max 24.000000\n",
" Name: RMSP, dtype: float64\n",
"RNTM: Meals included in rent\n",
" b: N/A (GQ/not a rental unit/occupied without rent payment)\n",
" 1: Yes\n",
" 2: No\n",
" ...\n",
" count 7373.000000\n",
" mean 1.987658\n",
" std 0.110416\n",
" min 1.000000\n",
" 25% 2.000000\n",
" 50% 2.000000\n",
" 75% 2.000000\n",
" max 2.000000\n",
" Name: RNTM, dtype: float64\n",
"RNTP: Monthly rent\n",
" ['Note: Use values from ADJHSG to adjust RNTP to constant dollars.']\n",
" bbbbb: N/A (GQ/not a rental unit/occupied without rent payment)\n",
" 00001..99999: $1 to $99999 (Rounded and top-coded)\n",
" count 7373.000000\n",
" mean 1246.782856\n",
" std 769.088231\n",
" min 4.000000\n",
" 25% 730.000000\n",
" 50% 1100.000000\n",
" 75% 1700.000000\n",
" max 3900.000000\n",
" Name: RNTP, dtype: float64\n",
"RWAT: Hot and cold running water\n",
" b: N/A (GQ)\n",
" 1: Yes\n",
" 2: No\n",
" ...\n",
" count 14844.000000\n",
" mean 1.007343\n",
" std 0.085379\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 1.000000\n",
" 75% 1.000000\n",
" max 2.000000\n",
" Name: RWAT, dtype: float64\n",
"RWATPR: Running water\n",
" b: N/A (GQ)\n",
" 1: Yes\n",
" 2: No\n",
" ...\n",
" count 14844\n",
" mean 9\n",
" std 0\n",
" min 9\n",
" 25% 9\n",
" 50% 9\n",
" 75% 9\n",
" max 9\n",
" Name: RWATPR, dtype: float64\n",
"SINK: Sink with a faucet\n",
" b: N/A (GQ)\n",
" 1: Yes\n",
" 2: No\n",
" ...\n",
" count 14844.000000\n",
" mean 1.005457\n",
" std 0.073670\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 1.000000\n",
" 75% 1.000000\n",
" max 2.000000\n",
" Name: SINK, dtype: float64\n",
"SMP: Total payment on all second and junior mortgages and home equity loans (monthly amount)\n",
" ['Note: Use ADJHSG to adjust SMP to constant dollars.']\n",
" bbbbb: N/A (GQ/vacant/not owned or being bought/ no second or junior mortgages or home equity loans)\n",
" 00001..99999: $1 to $99999 (Rounded and top-coded)\n",
" count 1228.000000\n",
" mean 506.485342\n",
" std 569.513130\n",
" min 4.000000\n",
" 25% 170.000000\n",
" 50% 350.000000\n",
" 75% 600.000000\n",
" max 4100.000000\n",
" Name: SMP, dtype: float64\n",
"STOV: Stove or range\n",
" b: N/A (GQ)\n",
" 1: Yes\n",
" 2: No\n",
" ...\n",
" count 14844.000000\n",
" mean 1.010105\n",
" std 0.100018\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 1.000000\n",
" 75% 1.000000\n",
" max 2.000000\n",
" Name: STOV, dtype: float64\n",
"TEL: Telephone\n",
" ['NOTE: Problems in the collection of data on the availability of telephone service (TEL) in 2012 led to suppressing this variable in six PUMAs in Georgia. This only affects 2012 vintage data. See the Estimation section of the Accuracy of the Data for the 2009-2013 5-year PUMS for more information on PUMS estimates using TEL. http://www.census.gov/acs/www/data_documentation/pums_documentation/']\n",
" b: N/A (GQ/vacant)\n",
" 1: Yes\n",
" 2: No\n",
" ...\n",
" count 13737.000000\n",
" mean 1.029701\n",
" std 0.169767\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 1.000000\n",
" 75% 1.000000\n",
" max 2.000000\n",
" Name: TEL, dtype: float64\n",
"TEN: Tenure\n",
" b: N/A (GQ/vacant)\n",
" 1: Owned with mortgage or loan (include home equity loans)\n",
" 2: Owned free and clear\n",
" ...\n",
" count 13737.000000\n",
" mean 2.174128\n",
" std 0.945958\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 3.000000\n",
" 75% 3.000000\n",
" max 4.000000\n",
" Name: TEN, dtype: float64\n",
"TOIL: Flush toilet\n",
" b: N/A (GQ)\n",
" 1: Yes\n",
" 2: No\n",
" ...\n",
" count 14844.000000\n",
" mean 1.005591\n",
" std 0.074569\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 1.000000\n",
" 75% 1.000000\n",
" max 2.000000\n",
" Name: TOIL, dtype: float64\n",
"VACS: Vacancy status\n",
" b: N/A (GQ/occupied)\n",
" 1: For rent\n",
" 2: Rented, not occupied\n",
" ...\n",
" count 1107.000000\n",
" mean 4.214995\n",
" std 2.565742\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 4.000000\n",
" 75% 7.000000\n",
" max 7.000000\n",
" Name: VACS, dtype: float64\n",
"VALP: Property value\n",
" bbbbbbb: N/A (GQ/vacant units, except �for-sale-only� and �sold, not occupied�/not owned or being bought)\n",
" 0000000: $0 (applies to 2009 and 2010 only)\n",
" 0000001..9999999: $1 to $9999999 (Rounded and top-coded)\n",
" ...\n",
" count 6741.000000\n",
" mean 576821.155615\n",
" std 578742.887940\n",
" min 180.000000\n",
" 25% 290000.000000\n",
" 50% 410000.000000\n",
" 75% 700000.000000\n",
" max 5303000.000000\n",
" Name: VALP, dtype: float64\n",
"VEH: Vehicles (1 ton or less) available\n",
" b: N/A (GQ/vacant)\n",
" 0: No vehicles\n",
" 1: 1 vehicle\n",
" ...\n",
" count 13737.000000\n",
" mean 0.925311\n",
" std 0.869523\n",
" min 0.000000\n",
" 25% 0.000000\n",
" 50% 1.000000\n",
" 75% 1.000000\n",
" max 6.000000\n",
" Name: VEH, dtype: float64\n",
"WATP: Water (yearly cost)\n",
" ['Note: Use values from ADJHSG to adjust WATP values 3 and over to constant dollars.']\n",
" bbbb: N/A (GQ/vacant)\n",
" 0001: Included in rent or in condo fee\n",
" 0002: No charge\n",
" ...\n",
" count 13737.000000\n",
" mean 239.380724\n",
" std 412.715268\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 2.000000\n",
" 75% 390.000000\n",
" max 3900.000000\n",
" Name: WATP, dtype: float64\n",
"YBL: When structure first built\n",
" bb: N/A (GQ)\n",
" 01: 1939 or earlier\n",
" 02: 1940 to 1949\n",
" ...\n",
" count 14844.000000\n",
" mean 3.239289\n",
" std 2.768117\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 2.000000\n",
" 75% 4.000000\n",
" max 16.000000\n",
" Name: YBL, dtype: float64\n",
"FES: Family type and employment status\n",
" b: N/A (GQ/vacant/not a family)\n",
" 1: Married-couple family: Husband and wife in LF\n",
" 2: Married-couple family: Husband in labor force, wife not in LF\n",
" ...\n",
" count 5929.000000\n",
" mean 3.998988\n",
" std 2.787092\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 4.000000\n",
" 75% 7.000000\n",
" max 8.000000\n",
" Name: FES, dtype: float64\n",
"FINCP: Family income (past 12 months)\n",
" ['Note: Use values from ADJINC to adjust FINCP to constant dollars.']\n",
" bbbbbbbb: N/A (GQ/vacant)\n",
" 00000000: No family income\n",
" -0059999: Loss of -$59,999 or more\n",
" ...\n",
" count 5953.000000\n",
" mean 130596.028725\n",
" std 154445.776061\n",
" min 0.000000\n",
" 25% 36000.000000\n",
" 50% 84500.000000\n",
" 75% 166000.000000\n",
" max 2087000.000000\n",
" Name: FINCP, dtype: float64\n",
"FPARC: Family presence and age of related children\n",
" b: N/A (GQ/vacant/not a family)\n",
" 1: With related children under 5 years only\n",
" 2: With related children 5 to 17 years only\n",
" ...\n",
" count 5953.000000\n",
" mean 3.076432\n",
" std 1.124643\n",
" min 1.000000\n",
" 25% 2.000000\n",
" 50% 4.000000\n",
" 75% 4.000000\n",
" max 4.000000\n",
" Name: FPARC, dtype: float64\n",
"GRNTP: Gross rent (monthly amount)\n",
" ['Note: Use values from ADJHSG to adjust GRNTP to constant dollars.']\n",
" bbbbb: N/A (GQ/vacant/not a rental unit/occupied without rent payment)\n",
" 00001..99999: $1 - $99999 (Components are rounded)\n",
" count 6989.000000\n",
" mean 1349.524109\n",
" std 791.201141\n",
" min 4.000000\n",
" 25% 819.000000\n",
" 50% 1200.000000\n",
" 75% 1800.000000\n",
" max 4510.000000\n",
" Name: GRNTP, dtype: float64\n",
"GRPIP: Gross rent as a percentage of household income past 12 months\n",
" bbb: N/A (GQ/vacant/not a rental unit/occupied without rent payment/no household income)\n",
" 001..100: 1% to 100%\n",
" 101: 101% or more\n",
" ...\n",
" count 6805.000000\n",
" mean 39.897575\n",
" std 28.575415\n",
" min 1.000000\n",
" 25% 20.000000\n",
" 50% 29.000000\n",
" 75% 51.000000\n",
" max 101.000000\n",
" Name: GRPIP, dtype: float64\n",
"HHL: Household language\n",
" b: N/A (GQ/vacant)\n",
" 1: English only\n",
" 2: Spanish\n",
" ...\n",
" count 13737.000000\n",
" mean 1.336245\n",
" std 0.850583\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 1.000000\n",
" 75% 1.000000\n",
" max 5.000000\n",
" Name: HHL, dtype: float64\n",
"HHT: Household/family type\n",
" b: N/A (GQ/vacant)\n",
" 1: Married couple household\n",
" 2: Other family household:Male householder, no wife present\n",
" ...\n",
" count 13737.000000\n",
" mean 3.805198\n",
" std 2.036160\n",
" min 1.000000\n",
" 25% 2.000000\n",
" 50% 4.000000\n",
" 75% 6.000000\n",
" max 7.000000\n",
" Name: HHT, dtype: float64\n",
"HINCP: Household income (past 12 months)\n",
" ['Note: Use values from ADJINC to adjust HINCP to constant dollars.']\n",
" bbbbbbbb: N/A(GQ/vacant)\n",
" 00000000: No household income\n",
" -0059999: Loss of -$59,999 or more\n",
" ...\n",
" count 13737.000000\n",
" mean 102051.604353\n",
" std 125888.164393\n",
" min -13600.000000\n",
" 25% 29200.000000\n",
" 50% 67000.000000\n",
" 75% 128000.000000\n",
" max 2087000.000000\n",
" Name: HINCP, dtype: float64\n",
"HUGCL: Household with grandparent living with grandchildren\n",
" b: N/A (GQ/vacant)\n",
" 0: Household without grandparent living with grandchildren\n",
" 1: Household with grandparent living with grandchildren\n",
" ...\n",
" count 13737.000000\n",
" mean 0.033559\n",
" std 0.180098\n",
" min 0.000000\n",
" 25% 0.000000\n",
" 50% 0.000000\n",
" 75% 0.000000\n",
" max 1.000000\n",
" Name: HUGCL, dtype: float64\n",
"HUPAC: HH presence and age of children\n",
" b: N/A (GQ/vacant)\n",
" 1: With children under 6 years only\n",
" 2: With children 6 to 17 years only\n",
" ...\n",
" count 13737.000000\n",
" mean 3.591541\n",
" std 0.888889\n",
" min 1.000000\n",
" 25% 4.000000\n",
" 50% 4.000000\n",
" 75% 4.000000\n",
" max 4.000000\n",
" Name: HUPAC, dtype: float64\n",
"HUPAOC: HH presence and age of own children\n",
" b: N/A (GQ/vacant)\n",
" 1: Presence of own children under 6 years only\n",
" 2: Presence of own children 6 to 17 years only\n",
" ...\n",
" count 13737.000000\n",
" mean 3.652617\n",
" std 0.836272\n",
" min 1.000000\n",
" 25% 4.000000\n",
" 50% 4.000000\n",
" 75% 4.000000\n",
" max 4.000000\n",
" Name: HUPAOC, dtype: float64\n",
"HUPARC: HH presence and age of related children\n",
" b: N/A (GQ/vacant)\n",
" 1: Presence of related children under 6 years only\n",
" 2: Presence of related children 6 to 17 years only\n",
" ...\n",
" count 13737.000000\n",
" mean 3.594599\n",
" std 0.886682\n",
" min 1.000000\n",
" 25% 4.000000\n",
" 50% 4.000000\n",
" 75% 4.000000\n",
" max 4.000000\n",
" Name: HUPARC, dtype: float64\n",
"KIT: Complete kitchen facilities\n",
" b: N/A (GQ)\n",
" 1: Yes, has stove or range, refrigerator, and sink with a faucet\n",
" 2: No\n",
" ...\n",
" count 14844.000000\n",
" mean 1.011587\n",
" std 0.107022\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 1.000000\n",
" 75% 1.000000\n",
" max 2.000000\n",
" Name: KIT, dtype: float64\n",
"LNGI: Limited English speaking households\n",
" b: N/A (GQ/vacant)\n",
" 1: At least one person in the household 14 and over speaks English only or speaks English 'very well'\n",
" 2: No one in the household 14 and over speaks English only or speaks English 'very well'\n",
" ...\n",
" count 13737.000000\n",
" mean 1.023368\n",
" std 0.151073\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 1.000000\n",
" 75% 1.000000\n",
" max 2.000000\n",
" Name: LNGI, dtype: float64\n",
"MULTG: Multigenerational Household\n",
" b: N/A (GQ/Vacant/NP=0)\n",
" 1: No, not a multigenerational household\n",
" 2: Yes, is a multigenerational household\n",
" ...\n",
" count 13737.000000\n",
" mean 1.032977\n",
" std 0.178582\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 1.000000\n",
" 75% 1.000000\n",
" max 2.000000\n",
" Name: MULTG, dtype: float64\n",
"MV: When moved into this house or apartment\n",
" b: N/A (GQ/vacant)\n",
" 1: 12 months or less\n",
" 2: 13 to 23 months\n",
" ...\n",
" count 13737.000000\n",
" mean 3.800466\n",
" std 1.936343\n",
" min 1.000000\n",
" 25% 2.000000\n",
" 50% 4.000000\n",
" 75% 5.000000\n",
" max 7.000000\n",
" Name: MV, dtype: float64\n",
"NOC: Number of own children in household (unweighted)\n",
" bb: N/A(GQ/vacant)\n",
" 00: No own children\n",
" 01..19: Number of own children in household\n",
" ...\n",
" count 13737.000000\n",
" mean 0.277572\n",
" std 0.727938\n",
" min 0.000000\n",
" 25% 0.000000\n",
" 50% 0.000000\n",
" 75% 0.000000\n",
" max 8.000000\n",
" Name: NOC, dtype: float64\n",
"NPF: Number of persons in family (unweighted)\n",
" bb: N/A (GQ/vacant/non-family household)\n",
" 02..20: Number of persons in family\n",
" count 5953.000000\n",
" mean 2.893331\n",
" std 1.224969\n",
" min 2.000000\n",
" 25% 2.000000\n",
" 50% 2.000000\n",
" 75% 3.000000\n",
" max 12.000000\n",
" Name: NPF, dtype: float64\n",
"NPP: Grandparent headed household with no parent present\n",
" b: N/A (GQ/vacant)\n",
" 0: Not a grandparent headed household with no parent present\n",
" 1: Grandparent headed household with no parent present\n",
" ...\n",
" count 13737.000000\n",
" mean 0.004586\n",
" std 0.067568\n",
" min 0.000000\n",
" 25% 0.000000\n",
" 50% 0.000000\n",
" 75% 0.000000\n",
" max 1.000000\n",
" Name: NPP, dtype: float64\n",
"NR: Presence of nonrelative in household\n",
" b: N/A (GQ/vacant)\n",
" 0: None\n",
" 1: 1 or more nonrelatives\n",
" ...\n",
" count 13737.000000\n",
" mean 0.159132\n",
" std 0.365813\n",
" min 0.000000\n",
" 25% 0.000000\n",
" 50% 0.000000\n",
" 75% 0.000000\n",
" max 1.000000\n",
" Name: NR, dtype: float64\n",
"NRC: Number of related children in household (unweighted)\n",
" bb: N/A (GQ/vacant)\n",
" 00: No related children\n",
" 01..19: Number of related children in household\n",
" ...\n",
" count 13737.000000\n",
" mean 0.333552\n",
" std 0.806915\n",
" min 0.000000\n",
" 25% 0.000000\n",
" 50% 0.000000\n",
" 75% 0.000000\n",
" max 10.000000\n",
" Name: NRC, dtype: float64\n",
"OCPIP: Selected monthly owner costs as a percentage of household income during the past 12 months\n",
" bbb: N/A (GQ/vacant/not owned or being bought/ no household income)\n",
" 001..100: 1% to 100%\n",
" 101: 101% or more\n",
" ...\n",
" count 6500.000000\n",
" mean 26.966923\n",
" std 23.259257\n",
" min 1.000000\n",
" 25% 12.000000\n",
" 50% 20.000000\n",
" 75% 32.000000\n",
" max 101.000000\n",
" Name: OCPIP, dtype: float64\n",
"PARTNER: Unmarried partner household\n",
" b: N/A (GQ/vacant)\n",
" 0: No unmarried partner in household\n",
" 1: Male householder, male partner\n",
" ...\n",
" count 13737.000000\n",
" mean 0.188032\n",
" std 0.751881\n",
" min 0.000000\n",
" 25% 0.000000\n",
" 50% 0.000000\n",
" 75% 0.000000\n",
" max 4.000000\n",
" Name: PARTNER, dtype: float64\n",
"PLM: Complete plumbing facilities\n",
" b: N/A (GQ)\n",
" 1: Yes, has hot and cold running water, a flush toilet, and a bathtub or shower\n",
" 2: No\n",
" ...\n",
" count 14844.000000\n",
" mean 1.008690\n",
" std 0.092819\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 1.000000\n",
" 75% 1.000000\n",
" max 2.000000\n",
" Name: PLM, dtype: float64\n",
"PSF: Presence of subfamilies in Household\n",
" b: N/A (GQ/vacant)\n",
" 0: No subfamilies\n",
" 1: 1 or more subfamilies\n",
" ...\n",
" count 13737.000000\n",
" mean 0.026789\n",
" std 0.161472\n",
" min 0.000000\n",
" 25% 0.000000\n",
" 50% 0.000000\n",
" 75% 0.000000\n",
" max 1.000000\n",
" Name: PSF, dtype: float64\n",
"R18: Presence of persons under 18 years in household (unweighted)\n",
" b: N/A (GQ/vacant)\n",
" 0: No person under 18 in household\n",
" 1: 1 or more persons under 18 in household\n",
" ...\n",
" count 13737.000000\n",
" mean 0.193638\n",
" std 0.395163\n",
" min 0.000000\n",
" 25% 0.000000\n",
" 50% 0.000000\n",
" 75% 0.000000\n",
" max 1.000000\n",
" Name: R18, dtype: float64\n",
"R60: Presence of persons 60 years and over in household (unweighted)\n",
" b: N/A (GQ/vacant)\n",
" 0: No person 60 and over\n",
" 1: 1 person 60 and over\n",
" ...\n",
" count 13737.000000\n",
" mean 0.415447\n",
" std 0.647227\n",
" min 0.000000\n",
" 25% 0.000000\n",
" 50% 0.000000\n",
" 75% 1.000000\n",
" max 2.000000\n",
" Name: R60, dtype: float64\n",
"R65: Presence of persons 65 years and over in household (unweighted)\n",
" b: N/A (GQ/vacant)\n",
" 0: No person 65 and over\n",
" 1: 1 person 65 and over\n",
" ...\n",
" count 13737.000000\n",
" mean 0.297809\n",
" std 0.568071\n",
" min 0.000000\n",
" 25% 0.000000\n",
" 50% 0.000000\n",
" 75% 0.000000\n",
" max 2.000000\n",
" Name: R65, dtype: float64\n",
"RESMODE: Response mode\n",
" b: N/A (GQ)\n",
" 1: Mail\n",
" 2: CATI/CAPI\n",
" ...\n",
" count 14844.000000\n",
" mean 1.523579\n",
" std 0.657194\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 1.000000\n",
" 75% 2.000000\n",
" max 3.000000\n",
" Name: RESMODE, dtype: float64\n",
"SMOCP: Selected monthly owner costs\n",
" ['Note: Use values from ADJHSG to adjust SMOCP to constant dollars.']\n",
" bbbbb: N/A (GQ/vacant/not owned or being bought)\n",
" 00000: None\n",
" 00001..99999: $1 - $99999 (Components are rounded)\n",
" ...\n",
" count 6561.000000\n",
" mean 2099.427526\n",
" std 1528.415666\n",
" min 0.000000\n",
" 25% 956.000000\n",
" 50% 1821.000000\n",
" 75% 2810.000000\n",
" max 12113.000000\n",
" Name: SMOCP, dtype: float64\n",
"SMX: Second or junior mortgage or home equity loan status\n",
" b: N/A (GQ/vacant/not owned or being bought)\n",
" 1: Yes, a second mortgage\n",
" 2: Yes, a home equity loan\n",
" ...\n",
" count 4971.000000\n",
" mean 2.709113\n",
" std 0.597341\n",
" min 1.000000\n",
" 25% 3.000000\n",
" 50% 3.000000\n",
" 75% 3.000000\n",
" max 4.000000\n",
" Name: SMX, dtype: float64\n",
"SRNT: Specified rent unit\n",
" b: N/A\n",
" 0: Not specified rent unit\n",
" 1: Specified rent unit\n",
" ...\n",
" count 14844.000000\n",
" mean 0.509229\n",
" std 0.499932\n",
" min 0.000000\n",
" 25% 0.000000\n",
" 50% 1.000000\n",
" 75% 1.000000\n",
" max 1.000000\n",
" Name: SRNT, dtype: float64\n",
"SVAL: Specified value owner unit\n",
" b: N/A\n",
" 0: Not specified value owner unit\n",
" 1: Specified value owner unit\n",
" ...\n",
" count 14844.000000\n",
" mean 0.328887\n",
" std 0.469825\n",
" min 0.000000\n",
" 25% 0.000000\n",
" 50% 0.000000\n",
" 75% 1.000000\n",
" max 1.000000\n",
" Name: SVAL, dtype: float64\n",
"TAXP: Property taxes (yearly amount)\n",
" ['Note: No adjustment factor is applied to TAXP.']\n",
" bb: N/A (GQ/vacant/not owned or being bought)\n",
" 01: None\n",
" 02: $ 1 - $ 49\n",
" ...\n",
" count 6561.000000\n",
" mean 36.513032\n",
" std 19.481382\n",
" min 1.000000\n",
" 25% 22.000000\n",
" 50% 33.000000\n",
" 75% 53.000000\n",
" max 68.000000\n",
" Name: TAXP, dtype: float64\n",
"WIF: Workers in family during the past 12 months\n",
" b: N/A (GQ/vacant/non-family household)\n",
" 0: No workers\n",
" 1: 1 worker\n",
" ...\n",
" count 5953.000000\n",
" mean 1.432555\n",
" std 0.824398\n",
" min 0.000000\n",
" 25% 1.000000\n",
" 50% 2.000000\n",
" 75% 2.000000\n",
" max 3.000000\n",
" Name: WIF, dtype: float64\n",
"WKEXREL: Work experience of householder and spouse\n",
" bb: N/A (GQ/vacant/not a family)\n",
" 01: Householder and spouse worked FT\n",
" 02: Householder worked FT; spouse worked < FT\n",
" ...\n",
" count 5953.000000\n",
" mean 7.787166\n",
" std 5.420605\n",
" min 1.000000\n",
" 25% 2.000000\n",
" 50% 8.000000\n",
" 75% 13.000000\n",
" max 15.000000\n",
" Name: WKEXREL, dtype: float64\n",
"WORKSTAT: Work status of householder or spouse in family households\n",
" bb: N/A (GQ/not a family household)\n",
" 01: Husband and wife both in labor force, both employed or in Armed Forces\n",
" 02: Husband and wife both in labor force, husband employed or in Armed Forces, wife unemployed\n",
" ...\n",
" count 5929.000000\n",
" mean 7.442570\n",
" std 5.594645\n",
" min 1.000000\n",
" 25% 1.000000\n",
" 50% 9.000000\n",
" 75% 13.000000\n",
" max 15.000000\n",
" Name: WORKSTAT, dtype: float64\n",
"num columns described = 84\n",
"\n"
]
}
],
"source": [
"print(\n",
"r\"\"\"`dfp`, `dfh`, `ddict`: Describe all columns ('variables') that aren't weights or flags.\n",
"Printed format:\n",
"[PERSON, HOUSING] RECORD\n",
"COL: Column name.\n",
" Column description.\n",
" Multi-line optional column notes.\n",
" 1-3 line description of value meanings ('variable codes').\n",
" Multi-line statistical description and data type.\n",
"...\n",
"num columns described = ncols\"\"\")\n",
"print()\n",
"records_dfs = collections.OrderedDict([\n",
" ('PERSON RECORD', {'dataframe': dfp, 'weight': pwt, 'replicate_weights': pwts}),\n",
" ('HOUSING RECORD', {'dataframe': dfh, 'weight': hwt, 'replicate_weights': hwts})])\n",
"for record_type in records_dfs:\n",
" print(record_type)\n",
" df = records_dfs[record_type]['dataframe']\n",
" ncols_desc = 0 # number of columns described\n",
" for col in df.columns:\n",
" if col in ddict['record_types'][record_type]:\n",
" col_dict = ddict['record_types'][record_type][col]\n",
" desc = col_dict['description']\n",
" else:\n",
" col_dict = None\n",
" desc = 'Column not in data dictionary.'\n",
" if not (\n",
" (col.startswith('F') and (desc.endswith(' flag') or desc.endswith(' edit')))\n",
" or ('WGTP' in col and \"Weight replicate\" in desc)):\n",
" print(\"{col}: {desc}\".format(col=col, desc=desc))\n",
" ncols_desc += 1\n",
" if col_dict is not None:\n",
" if 'notes' in col_dict:\n",
" print(\" {notes}\".format(notes=col_dict['notes']))\n",
" for (inum, var_code) in enumerate(col_dict['var_codes']):\n",
" var_code_desc = col_dict['var_codes'][var_code]\n",
" print(\" {vc}: {vcd}\".format(vc=var_code, vcd=var_code_desc))\n",
" if inum >= 2:\n",
" print(\" ...\")\n",
" break\n",
" print(' '+repr(df[col].describe()).replace('\\n', '\\n '))\n",
" print(\"num columns described = {ncd}\".format(ncd=ncols_desc))\n",
" print()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### PUMS estimates for user verification"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"`dfe`: Estimates for user verification filtered for 'District of Columbia'.\n"
]
},
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" st | \n",
" state | \n",
" characteristic | \n",
" pums_est_09_to_13 | \n",
" pums_se_09_to_13 | \n",
" pums_moe_09_to_13 | \n",
"
\n",
" \n",
" \n",
" \n",
" 288 | \n",
" 11 | \n",
" District of Columbia | \n",
" Total population | \n",
" 619,371 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 289 | \n",
" 11 | \n",
" District of Columbia | \n",
" Housing unit population (RELP=0-15) | \n",
" 579,281 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 290 | \n",
" 11 | \n",
" District of Columbia | \n",
" GQ population (RELP=16-17) | \n",
" 40,090 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 291 | \n",
" 11 | \n",
" District of Columbia | \n",
" GQ institutional population (RELP=16) | \n",
" 7,443 | \n",
" 80 | \n",
" 132 | \n",
"
\n",
" \n",
" 292 | \n",
" 11 | \n",
" District of Columbia | \n",
" GQ noninstitutional population (RELP=17) | \n",
" 32,647 | \n",
" 80 | \n",
" 132 | \n",
"
\n",
" \n",
" 293 | \n",
" 11 | \n",
" District of Columbia | \n",
" Total males (SEX=1) | \n",
" 292,566 | \n",
" 361 | \n",
" 595 | \n",
"
\n",
" \n",
" 294 | \n",
" 11 | \n",
" District of Columbia | \n",
" Total females (SEX=2) | \n",
" 326,805 | \n",
" 361 | \n",
" 595 | \n",
"
\n",
" \n",
" 295 | \n",
" 11 | \n",
" District of Columbia | \n",
" Age 0-4 | \n",
" 36,530 | \n",
" 253 | \n",
" 417 | \n",
"
\n",
" \n",
" 296 | \n",
" 11 | \n",
" District of Columbia | \n",
" Age 5-9 | \n",
" 27,658 | \n",
" 636 | \n",
" 1046 | \n",
"
\n",
" \n",
" 297 | \n",
" 11 | \n",
" District of Columbia | \n",
" Age 10-14 | \n",
" 24,621 | \n",
" 598 | \n",
" 984 | \n",
"
\n",
" \n",
" 298 | \n",
" 11 | \n",
" District of Columbia | \n",
" Age 15-19 | \n",
" 40,950 | \n",
" 825 | \n",
" 1357 | \n",
"
\n",
" \n",
" 299 | \n",
" 11 | \n",
" District of Columbia | \n",
" Age 20-24 | \n",
" 58,828 | \n",
" 779 | \n",
" 1281 | \n",
"
\n",
" \n",
" 300 | \n",
" 11 | \n",
" District of Columbia | \n",
" Age 25-34 | \n",
" 134,025 | \n",
" 526 | \n",
" 865 | \n",
"
\n",
" \n",
" 301 | \n",
" 11 | \n",
" District of Columbia | \n",
" Age 35-44 | \n",
" 84,310 | \n",
" 534 | \n",
" 878 | \n",
"
\n",
" \n",
" 302 | \n",
" 11 | \n",
" District of Columbia | \n",
" Age 45-54 | \n",
" 75,981 | \n",
" 435 | \n",
" 716 | \n",
"
\n",
" \n",
" 303 | \n",
" 11 | \n",
" District of Columbia | \n",
" Age 55-59 | \n",
" 35,191 | \n",
" 599 | \n",
" 985 | \n",
"
\n",
" \n",
" 304 | \n",
" 11 | \n",
" District of Columbia | \n",
" Age 60-64 | \n",
" 31,070 | \n",
" 590 | \n",
" 970 | \n",
"
\n",
" \n",
" 305 | \n",
" 11 | \n",
" District of Columbia | \n",
" Age 65-74 | \n",
" 38,245 | \n",
" 295 | \n",
" 485 | \n",
"
\n",
" \n",
" 306 | \n",
" 11 | \n",
" District of Columbia | \n",
" Age 75-84 | \n",
" 22,283 | \n",
" 420 | \n",
" 690 | \n",
"
\n",
" \n",
" 307 | \n",
" 11 | \n",
" District of Columbia | \n",
" Age 85 and over | \n",
" 9,679 | \n",
" 377 | \n",
" 619 | \n",
"
\n",
" \n",
" 308 | \n",
" 11 | \n",
" District of Columbia | \n",
" Total housing units (TYPE=1) | \n",
" 298,327 | \n",
" 113 | \n",
" 185 | \n",
"
\n",
" \n",
" 309 | \n",
" 11 | \n",
" District of Columbia | \n",
" Total occupied units | \n",
" 263,650 | \n",
" 965 | \n",
" 1588 | \n",
"
\n",
" \n",
" 310 | \n",
" 11 | \n",
" District of Columbia | \n",
" Owner occupied units (TEN in 1,2) | \n",
" 110,362 | \n",
" 1363 | \n",
" 2242 | \n",
"
\n",
" \n",
" 311 | \n",
" 11 | \n",
" District of Columbia | \n",
" Renter occupied units (TEN in 3,4) | \n",
" 153,288 | \n",
" 1486 | \n",
" 2444 | \n",
"
\n",
" \n",
" 312 | \n",
" 11 | \n",
" District of Columbia | \n",
" Owned with a mortgage (TEN=1) | \n",
" 85,483 | \n",
" 1208 | \n",
" 1988 | \n",
"
\n",
" \n",
" 313 | \n",
" 11 | \n",
" District of Columbia | \n",
" Owned free and clear (TEN=2) | \n",
" 24,879 | \n",
" 565 | \n",
" 929 | \n",
"
\n",
" \n",
" 314 | \n",
" 11 | \n",
" District of Columbia | \n",
" Rented for cash (TEN=3) | \n",
" 149,500 | \n",
" 1511 | \n",
" 2485 | \n",
"
\n",
" \n",
" 315 | \n",
" 11 | \n",
" District of Columbia | \n",
" No cash rent (TEN=4) | \n",
" 3,788 | \n",
" 262 | \n",
" 431 | \n",
"
\n",
" \n",
" 316 | \n",
" 11 | \n",
" District of Columbia | \n",
" Total vacant units | \n",
" 34,677 | \n",
" 920 | \n",
" 1514 | \n",
"
\n",
" \n",
" 317 | \n",
" 11 | \n",
" District of Columbia | \n",
" For rent (VACS=1) | \n",
" 10,686 | \n",
" 618 | \n",
" 1017 | \n",
"
\n",
" \n",
" 318 | \n",
" 11 | \n",
" District of Columbia | \n",
" For sale only (VACS=3) | \n",
" 2,953 | \n",
" 325 | \n",
" 534 | \n",
"
\n",
" \n",
" 319 | \n",
" 11 | \n",
" District of Columbia | \n",
" All Other Vacant (VACS in 2,4,5,6,7) | \n",
" 21,038 | \n",
" 849 | \n",
" 1397 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" st state characteristic \\\n",
"288 11 District of Columbia Total population \n",
"289 11 District of Columbia Housing unit population (RELP=0-15) \n",
"290 11 District of Columbia GQ population (RELP=16-17) \n",
"291 11 District of Columbia GQ institutional population (RELP=16) \n",
"292 11 District of Columbia GQ noninstitutional population (RELP=17) \n",
"293 11 District of Columbia Total males (SEX=1) \n",
"294 11 District of Columbia Total females (SEX=2) \n",
"295 11 District of Columbia Age 0-4 \n",
"296 11 District of Columbia Age 5-9 \n",
"297 11 District of Columbia Age 10-14 \n",
"298 11 District of Columbia Age 15-19 \n",
"299 11 District of Columbia Age 20-24 \n",
"300 11 District of Columbia Age 25-34 \n",
"301 11 District of Columbia Age 35-44 \n",
"302 11 District of Columbia Age 45-54 \n",
"303 11 District of Columbia Age 55-59 \n",
"304 11 District of Columbia Age 60-64 \n",
"305 11 District of Columbia Age 65-74 \n",
"306 11 District of Columbia Age 75-84 \n",
"307 11 District of Columbia Age 85 and over \n",
"308 11 District of Columbia Total housing units (TYPE=1) \n",
"309 11 District of Columbia Total occupied units \n",
"310 11 District of Columbia Owner occupied units (TEN in 1,2) \n",
"311 11 District of Columbia Renter occupied units (TEN in 3,4) \n",
"312 11 District of Columbia Owned with a mortgage (TEN=1) \n",
"313 11 District of Columbia Owned free and clear (TEN=2) \n",
"314 11 District of Columbia Rented for cash (TEN=3) \n",
"315 11 District of Columbia No cash rent (TEN=4) \n",
"316 11 District of Columbia Total vacant units \n",
"317 11 District of Columbia For rent (VACS=1) \n",
"318 11 District of Columbia For sale only (VACS=3) \n",
"319 11 District of Columbia All Other Vacant (VACS in 2,4,5,6,7) \n",
"\n",
" pums_est_09_to_13 pums_se_09_to_13 pums_moe_09_to_13 \n",
"288 619,371 0 0 \n",
"289 579,281 0 0 \n",
"290 40,090 0 0 \n",
"291 7,443 80 132 \n",
"292 32,647 80 132 \n",
"293 292,566 361 595 \n",
"294 326,805 361 595 \n",
"295 36,530 253 417 \n",
"296 27,658 636 1046 \n",
"297 24,621 598 984 \n",
"298 40,950 825 1357 \n",
"299 58,828 779 1281 \n",
"300 134,025 526 865 \n",
"301 84,310 534 878 \n",
"302 75,981 435 716 \n",
"303 35,191 599 985 \n",
"304 31,070 590 970 \n",
"305 38,245 295 485 \n",
"306 22,283 420 690 \n",
"307 9,679 377 619 \n",
"308 298,327 113 185 \n",
"309 263,650 965 1588 \n",
"310 110,362 1363 2242 \n",
"311 153,288 1486 2444 \n",
"312 85,483 1208 1988 \n",
"313 24,879 565 929 \n",
"314 149,500 1511 2485 \n",
"315 3,788 262 431 \n",
"316 34,677 920 1514 \n",
"317 10,686 618 1017 \n",
"318 2,953 325 534 \n",
"319 21,038 849 1397 "
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print(\"`dfe`: Estimates for user verification filtered for 'District of Columbia'.\")\n",
"dfe = pd.read_csv(path_ecsv)\n",
"tfmask_dc = dfe['state'] == 'District of Columbia'\n",
"dfe_dc = dfe.loc[tfmask_dc]\n",
"dfe_dc"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"`dfe`: Verify characteristic estimates, direct standard errors, and margin of error.\n",
"\n",
"'PERSON RECORD'\n",
" 'Total population'\n",
" 'pums_est_09_to_13': (ref, test) = (619371, 619371)\n",
" 'pums_se_09_to_13' : (ref, test) = (0, 0.0)\n",
" 'pums_moe_09_to_13': (ref, test) = (0, 0.0)\n",
" 'Housing unit population (RELP=0-15)'\n",
" 'pums_est_09_to_13': (ref, test) = (579281, 579281)\n",
" 'pums_se_09_to_13' : (ref, test) = (0, 0.0)\n",
" 'pums_moe_09_to_13': (ref, test) = (0, 0.0)\n",
" 'GQ population (RELP=16-17)'\n",
" 'pums_est_09_to_13': (ref, test) = (40090, 40090)\n",
" 'pums_se_09_to_13' : (ref, test) = (0, 0.0)\n",
" 'pums_moe_09_to_13': (ref, test) = (0, 0.0)\n",
" 'GQ institutional population (RELP=16)'\n",
" 'pums_est_09_to_13': (ref, test) = (7443, 7443)\n",
" 'pums_se_09_to_13' : (ref, test) = (80, 80.30971298666184)\n",
" 'pums_moe_09_to_13': (ref, test) = (132, 132.10947786305871)\n",
" 'GQ noninstitutional population (RELP=17)'\n",
" 'pums_est_09_to_13': (ref, test) = (32647, 32647)\n",
" 'pums_se_09_to_13' : (ref, test) = (80, 80.30971298666184)\n",
" 'pums_moe_09_to_13': (ref, test) = (132, 132.10947786305871)\n",
" 'Total males (SEX=1)'\n",
" 'pums_est_09_to_13': (ref, test) = (292566, 292566)\n",
" 'pums_se_09_to_13' : (ref, test) = (361, 361.4210148843036)\n",
" 'pums_moe_09_to_13': (ref, test) = (595, 594.5375694846794)\n",
" 'Total females (SEX=2)'\n",
" 'pums_est_09_to_13': (ref, test) = (326805, 326805)\n",
" 'pums_se_09_to_13' : (ref, test) = (361, 361.4210148843036)\n",
" 'pums_moe_09_to_13': (ref, test) = (595, 594.5375694846794)\n",
" 'Age 0-4'\n",
" 'pums_est_09_to_13': (ref, test) = (36530, 36530)\n",
" 'pums_se_09_to_13' : (ref, test) = (253, 253.37699185206222)\n",
" 'pums_moe_09_to_13': (ref, test) = (417, 416.80515159664236)\n",
" 'Age 5-9'\n",
" 'pums_est_09_to_13': (ref, test) = (27658, 27658)\n",
" 'pums_se_09_to_13' : (ref, test) = (636, 635.5916141674621)\n",
" 'pums_moe_09_to_13': (ref, test) = (1046, 1045.5482053054752)\n",
" 'Age 10-14'\n",
" 'pums_est_09_to_13': (ref, test) = (24621, 24621)\n",
" 'pums_se_09_to_13' : (ref, test) = (598, 598.0936799532328)\n",
" 'pums_moe_09_to_13': (ref, test) = (984, 983.864103523068)\n",
" 'Age 15-19'\n",
" 'pums_est_09_to_13': (ref, test) = (40950, 40950)\n",
" 'pums_se_09_to_13' : (ref, test) = (825, 825.0349386541154)\n",
" 'pums_moe_09_to_13': (ref, test) = (1357, 1357.18247408602)\n",
" 'Age 20-24'\n",
" 'pums_est_09_to_13': (ref, test) = (58828, 58828)\n",
" 'pums_se_09_to_13' : (ref, test) = (779, 778.715930233869)\n",
" 'pums_moe_09_to_13': (ref, test) = (1281, 1280.9877052347144)\n",
" 'Age 25-34'\n",
" 'pums_est_09_to_13': (ref, test) = (134025, 134025)\n",
" 'pums_se_09_to_13' : (ref, test) = (526, 525.9921102069878)\n",
" 'pums_moe_09_to_13': (ref, test) = (865, 865.257021290495)\n",
" 'Age 35-44'\n",
" 'pums_est_09_to_13': (ref, test) = (84310, 84310)\n",
" 'pums_se_09_to_13' : (ref, test) = (534, 533.5205244411877)\n",
" 'pums_moe_09_to_13': (ref, test) = (878, 877.6412627057538)\n",
" 'Age 45-54'\n",
" 'pums_est_09_to_13': (ref, test) = (75981, 75981)\n",
" 'pums_se_09_to_13' : (ref, test) = (435, 435.0808545546448)\n",
" 'pums_moe_09_to_13': (ref, test) = (716, 715.7080057423907)\n",
" 'Age 55-59'\n",
" 'pums_est_09_to_13': (ref, test) = (35191, 35191)\n",
" 'pums_se_09_to_13' : (ref, test) = (599, 598.5786915686191)\n",
" 'pums_moe_09_to_13': (ref, test) = (985, 984.6619476303784)\n",
" 'Age 60-64'\n",
" 'pums_est_09_to_13': (ref, test) = (31070, 31070)\n",
" 'pums_se_09_to_13' : (ref, test) = (590, 589.6810154651412)\n",
" 'pums_moe_09_to_13': (ref, test) = (970, 970.0252704401572)\n",
" 'Age 65-74'\n",
" 'pums_est_09_to_13': (ref, test) = (38245, 38245)\n",
" 'pums_se_09_to_13' : (ref, test) = (295, 295.0997289053313)\n",
" 'pums_moe_09_to_13': (ref, test) = (485, 485.43905404927)\n",
" 'Age 75-84'\n",
" 'pums_est_09_to_13': (ref, test) = (22283, 22283)\n",
" 'pums_se_09_to_13' : (ref, test) = (420, 419.69280432239964)\n",
" 'pums_moe_09_to_13': (ref, test) = (690, 690.3946631103474)\n",
" 'Age 85 and over'\n",
" 'pums_est_09_to_13': (ref, test) = (9679, 9679)\n",
" 'pums_se_09_to_13' : (ref, test) = (377, 376.5637396245156)\n",
" 'pums_moe_09_to_13': (ref, test) = (619, 619.4473516823282)\n",
"'HOUSING RECORD'\n",
" 'Total housing units (TYPE=1)'\n",
" 'pums_est_09_to_13': (ref, test) = (298327, 298327)\n",
" 'pums_se_09_to_13' : (ref, test) = (113, 112.68873058118989)\n",
" 'pums_moe_09_to_13': (ref, test) = (185, 185.37296180605736)\n",
" 'Total occupied units'\n",
" 'pums_est_09_to_13': (ref, test) = (263650, 263650)\n",
" 'pums_se_09_to_13' : (ref, test) = (965, 965.0778984102786)\n",
" 'pums_moe_09_to_13': (ref, test) = (1588, 1587.5531428849083)\n",
" 'Owner occupied units (TEN in 1,2)'\n",
" 'pums_est_09_to_13': (ref, test) = (110362, 110362)\n",
" 'pums_se_09_to_13' : (ref, test) = (1363, 1363.1910174293257)\n",
" 'pums_moe_09_to_13': (ref, test) = (2242, 2242.449223671241)\n",
" 'Renter occupied units (TEN in 3,4)'\n",
" 'pums_est_09_to_13': (ref, test) = (153288, 153288)\n",
" 'pums_se_09_to_13' : (ref, test) = (1486, 1485.6482760061347)\n",
" 'pums_moe_09_to_13': (ref, test) = (2444, 2443.8914140300917)\n",
" 'Owned with a mortgage (TEN=1)'\n",
" 'pums_est_09_to_13': (ref, test) = (85483, 85483)\n",
" 'pums_se_09_to_13' : (ref, test) = (1208, 1208.399126944405)\n",
" 'pums_moe_09_to_13': (ref, test) = (1988, 1987.8165638235462)\n",
" 'Owned free and clear (TEN=2)'\n",
" 'pums_est_09_to_13': (ref, test) = (24879, 24879)\n",
" 'pums_se_09_to_13' : (ref, test) = (565, 565.0110618386157)\n",
" 'pums_moe_09_to_13': (ref, test) = (929, 929.4431967245227)\n",
" 'Rented for cash (TEN=3)'\n",
" 'pums_est_09_to_13': (ref, test) = (149500, 149500)\n",
" 'pums_se_09_to_13' : (ref, test) = (1511, 1510.8262970970554)\n",
" 'pums_moe_09_to_13': (ref, test) = (2485, 2485.309258724656)\n",
" 'No cash rent (TEN=4)'\n",
" 'pums_est_09_to_13': (ref, test) = (3788, 3788)\n",
" 'pums_se_09_to_13' : (ref, test) = (262, 262.1715087495207)\n",
" 'pums_moe_09_to_13': (ref, test) = (431, 431.2721318929615)\n",
" 'Total vacant units'\n",
" 'pums_est_09_to_13': (ref, test) = (34677, 34677)\n",
" 'pums_se_09_to_13' : (ref, test) = (920, 920.3688391074527)\n",
" 'pums_moe_09_to_13': (ref, test) = (1514, 1514.0067403317596)\n",
" 'For rent (VACS=1)'\n",
" 'pums_est_09_to_13': (ref, test) = (10686, 10686)\n",
" 'pums_se_09_to_13' : (ref, test) = (618, 618.3948172486571)\n",
" 'pums_moe_09_to_13': (ref, test) = (1017, 1017.2594743740409)\n",
" 'For sale only (VACS=3)'\n",
" 'pums_est_09_to_13': (ref, test) = (2953, 2953)\n",
" 'pums_se_09_to_13' : (ref, test) = (325, 324.77245880770124)\n",
" 'pums_moe_09_to_13': (ref, test) = (534, 534.2506947386686)\n",
" 'All Other Vacant (VACS in 2,4,5,6,7)'\n",
" 'pums_est_09_to_13': (ref, test) = (21038, 21038)\n",
" 'pums_se_09_to_13' : (ref, test) = (849, 849.4756618055635)\n",
" 'pums_moe_09_to_13': (ref, test) = (1397, 1397.3874636701519)\n"
]
}
],
"source": [
"print(\"`dfe`: Verify characteristic estimates, direct standard errors, and margin of error.\")\n",
"# Verify the estimates following\n",
"# https://www.census.gov/programs-surveys/acs/\n",
"# technical-documentation/pums/documentation.2013.html\n",
"# tech_docs/pums/accuracy/2009_2013AccuracyPUMS.pdf\n",
"print()\n",
"tfmask_test_strs = collections.OrderedDict([\n",
" ('PERSON RECORD', collections.OrderedDict([\n",
" ('Total population', \"np.asarray([True]*len(dfp))\"),\n",
" ('Housing unit population (RELP=0-15)',\"np.logical_and(0 <= dfp['RELP'], dfp['RELP'] <= 15)\"),\n",
" ('GQ population (RELP=16-17)', \"np.logical_and(16 <= dfp['RELP'], dfp['RELP'] <= 17)\"),\n",
" ('GQ institutional population (RELP=16)', \"dfp['RELP'] == 16\"),\n",
" ('GQ noninstitutional population (RELP=17)', \"dfp['RELP'] == 17\"),\n",
" ('Total males (SEX=1)', \"dfp['SEX'] == 1\"),\n",
" ('Total females (SEX=2)', \"dfp['SEX'] == 2\"),\n",
" ('Age 0-4', \"np.logical_and(0 <= dfp['AGEP'], dfp['AGEP'] <= 4)\"),\n",
" ('Age 5-9', \"np.logical_and(5 <= dfp['AGEP'], dfp['AGEP'] <= 9)\"),\n",
" ('Age 10-14', \"np.logical_and(10 <= dfp['AGEP'], dfp['AGEP'] <= 14)\"),\n",
" ('Age 15-19', \"np.logical_and(15 <= dfp['AGEP'], dfp['AGEP'] <= 19)\"),\n",
" ('Age 20-24', \"np.logical_and(20 <= dfp['AGEP'], dfp['AGEP'] <= 24)\"),\n",
" ('Age 25-34', \"np.logical_and(25 <= dfp['AGEP'], dfp['AGEP'] <= 34)\"),\n",
" ('Age 35-44', \"np.logical_and(35 <= dfp['AGEP'], dfp['AGEP'] <= 44)\"),\n",
" ('Age 45-54', \"np.logical_and(45 <= dfp['AGEP'], dfp['AGEP'] <= 54)\"),\n",
" ('Age 55-59', \"np.logical_and(55 <= dfp['AGEP'], dfp['AGEP'] <= 59)\"),\n",
" ('Age 60-64', \"np.logical_and(60 <= dfp['AGEP'], dfp['AGEP'] <= 64)\"),\n",
" ('Age 65-74', \"np.logical_and(65 <= dfp['AGEP'], dfp['AGEP'] <= 74)\"),\n",
" ('Age 75-84', \"np.logical_and(75 <= dfp['AGEP'], dfp['AGEP'] <= 84)\"),\n",
" ('Age 85 and over', \"85 <= dfp['AGEP']\")])),\n",
" ('HOUSING RECORD', collections.OrderedDict([\n",
" ('Total housing units (TYPE=1)', \"dfh['TYPE'] == 1\"),\n",
" ('Total occupied units', \"dfh['TEN'].notnull()\"),\n",
" ('Owner occupied units (TEN in 1,2)', \"np.logical_or(dfh['TEN'] == 1, dfh['TEN'] == 2)\"),\n",
" ('Renter occupied units (TEN in 3,4)', \"np.logical_or(dfh['TEN'] == 3, dfh['TEN'] == 4)\"),\n",
" ('Owned with a mortgage (TEN=1)', \"dfh['TEN'] == 1\"),\n",
" ('Owned free and clear (TEN=2)', \"dfh['TEN'] == 2\"),\n",
" ('Rented for cash (TEN=3)', \"dfh['TEN'] == 3\"),\n",
" ('No cash rent (TEN=4)', \"dfh['TEN'] == 4\"),\n",
" ('Total vacant units', \"dfh['TEN'].isnull()\"),\n",
" ('For rent (VACS=1)', \"dfh['VACS'] == 1\"),\n",
" ('For sale only (VACS=3)', \"dfh['VACS'] == 3\"),\n",
" ('All Other Vacant (VACS in 2,4,5,6,7)',\n",
" \"functools.reduce(np.logical_or, (dfh['VACS'] == vacs for vacs in [2,4,5,6,7]))\")]))])\n",
"for record_type in records_dfs:\n",
" print(\"'{rt}'\".format(rt=record_type))\n",
" df = records_dfs[record_type]['dataframe']\n",
" wt = records_dfs[record_type]['weight']\n",
" wts = records_dfs[record_type]['replicate_weights']\n",
" for char in tfmask_test_strs[record_type]:\n",
" print(\" '{char}'\".format(char=char))\n",
" # Select the reference verification data\n",
" # and the records for the characteristic.\n",
" tfmask_ref = dfe_dc['characteristic'] == char \n",
" tfmask_test = eval(tfmask_test_strs[record_type][char])\n",
" # Calculate and verify the estimate ('est') for the characteristic.\n",
" # The estimate is the sum of the sample weights 'WGTP'.\n",
" col = 'pums_est_09_to_13'\n",
" print(\" '{col}':\".format(col=col), end=' ')\n",
" ref_est = int(dfe_dc.loc[tfmask_ref, col].values[0].replace(',', ''))\n",
" test_est = df.loc[tfmask_test, wt].sum()\n",
" assert np.isclose(ref_est, test_est, rtol=0, atol=1)\n",
" print(\"(ref, test) = {tup}\".format(tup=(ref_est, test_est)))\n",
" # Calculate and verify the \"direct standard error\" ('se') of the estimate.\n",
" # The direct standard error is a modified root-mean-square deviation\n",
" # using the \"replicate weights\" 'WGTP[1-80]'.\n",
" col = 'pums_se_09_to_13'\n",
" print(\" '{col}' :\".format(col=col), end=' ')\n",
" ref_se = dfe_dc.loc[tfmask_ref, col].values[0]\n",
" test_se = ((4/80)*((df.loc[tfmask_test, wts].sum() - test_est)**2).sum())**0.5\n",
" assert np.isclose(ref_se, test_se, rtol=0, atol=1)\n",
" print(\"(ref, test) = {tup}\".format(tup=(ref_se, test_se)))\n",
" # Calculate and verify the margin of error ('moe') at the\n",
" # 90% confidence level (+/- 1.645 standard errors).\n",
" col = 'pums_moe_09_to_13'\n",
" print(\" '{col}':\".format(col=col), end=' ')\n",
" ref_moe = dfe_dc.loc[tfmask_ref, col].values[0]\n",
" test_moe = 1.645*test_se\n",
" assert np.isclose(ref_moe, test_moe, rtol=0, atol=1)\n",
" print(\"(ref, test) = {tup}\".format(tup=(ref_moe, test_moe)))"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"## Export ipynb to html"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"jupyter nbconvert --to html --template basic /home/samuel_harrold/stharrold.github.io/content/static/20160110-etl-census-with-python/20160110-etl-census-with-python.ipynb --output /home/samuel_harrold/stharrold.github.io/content/static/20160110-etl-census-with-python/20160110-etl-census-with-python-basic.html\n",
"\n",
"jupyter nbconvert --to html --template full /home/samuel_harrold/stharrold.github.io/content/static/20160110-etl-census-with-python/20160110-etl-census-with-python.ipynb --output /home/samuel_harrold/stharrold.github.io/content/static/20160110-etl-census-with-python/20160110-etl-census-with-python-full.html\n",
"\n"
]
}
],
"source": [
"# Export ipynb to html\n",
"for template in ['basic', 'full']:\n",
" path_html = os.path.splitext(path_ipynb)[0]+'-'+template+'.html'\n",
" cmd = ['jupyter', 'nbconvert', '--to', 'html', '--template', template, path_ipynb, '--output', path_html]\n",
" print(' '.join(cmd))\n",
" subprocess.run(args=cmd, check=True)\n",
" print()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.1"
}
},
"nbformat": 4,
"nbformat_minor": 0
}