{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### Requests\n", "\n", "This is a markdown chunk." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import requests\n", "\n", "response = requests.get('https://xkcd.com/869')\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "\n", " \n", " \n", " \n", " xkcd: Server Attention Span\n", " \n", " \n", " \n", " \n" ] } ], "source": [ "from bs4 import BeautifulSoup\n", "\n", "doc = BeautifulSoup(response.text, 'lxml')\n", "print('\\n'.join(\n", " doc.prettify().splitlines()[0:10]\n", " ))" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\"They have to keep the adjacent rack units empty. Otherwise, half the entries in their /var/log/syslog are just 'SERVER BELOW TRYING TO START CONVERSATION *AGAIN*.' and 'WISH THEY'D STOP GIVING HIM SO MUCH COFFEE IT SPLATTERS EVERYWHERE.'\"" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "img = doc.select_one('#comic > img')\n", "img['title']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### HTML Table" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NameLabelConceptRequiredAttributesLimitPredicate TypeGroupUnnamed: 8
0AIANHHGeographyNaNnot requiredNaN0(not a predicate)NaNNaN
1AIHHTLGeographyNaNnot requiredNaN0(not a predicate)NaNNaN
2AIRESGeographyNaNnot requiredNaN0(not a predicate)NaNNaN
3ANRCGeographyNaNnot requiredNaN0(not a predicate)NaNNaN
4B00001_001EEstimate!!TotalUNWEIGHTED SAMPLE COUNT OF THE POPULATIONnot requiredB00001_001EA0intB00001NaN
\n", "
" ], "text/plain": [ " Name Label Concept \\\n", "0 AIANHH Geography NaN \n", "1 AIHHTL Geography NaN \n", "2 AIRES Geography NaN \n", "3 ANRC Geography NaN \n", "4 B00001_001E Estimate!!Total UNWEIGHTED SAMPLE COUNT OF THE POPULATION \n", "\n", " Required Attributes Limit Predicate Type Group Unnamed: 8 \n", "0 not required NaN 0 (not a predicate) NaN NaN \n", "1 not required NaN 0 (not a predicate) NaN NaN \n", "2 not required NaN 0 (not a predicate) NaN NaN \n", "3 not required NaN 0 (not a predicate) NaN NaN \n", "4 not required B00001_001EA 0 int B00001 NaN " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "\n", "vars = (\n", " pd\n", " .read_html('https://api.census.gov/data/2017/acs/acs5/variables.html')\n", " .pop()\n", ")\n", "vars.head()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NameLabel
11214B19013_001EEstimate!!Median household income in the past ...
11215B19013A_001EEstimate!!Median household income in the past ...
11216B19013B_001EEstimate!!Median household income in the past ...
11217B19013C_001EEstimate!!Median household income in the past ...
11218B19013D_001EEstimate!!Median household income in the past ...
11219B19013E_001EEstimate!!Median household income in the past ...
11220B19013F_001EEstimate!!Median household income in the past ...
11221B19013G_001EEstimate!!Median household income in the past ...
11222B19013H_001EEstimate!!Median household income in the past ...
11223B19013I_001EEstimate!!Median household income in the past ...
11932B19049_001EEstimate!!Median household income in the past ...
11933B19049_002EEstimate!!Median household income in the past ...
11934B19049_003EEstimate!!Median household income in the past ...
11935B19049_004EEstimate!!Median household income in the past ...
11936B19049_005EEstimate!!Median household income in the past ...
19332B25099_001EEstimate!!Median household income!!Total
19333B25099_002EEstimate!!Median household income!!Total!!Medi...
19334B25099_003EEstimate!!Median household income!!Total!!Medi...
19643B25119_001EEstimate!!Median household income in the past ...
19644B25119_002EEstimate!!Median household income in the past ...
19645B25119_003EEstimate!!Median household income in the past ...
\n", "
" ], "text/plain": [ " Name Label\n", "11214 B19013_001E Estimate!!Median household income in the past ...\n", "11215 B19013A_001E Estimate!!Median household income in the past ...\n", "11216 B19013B_001E Estimate!!Median household income in the past ...\n", "11217 B19013C_001E Estimate!!Median household income in the past ...\n", "11218 B19013D_001E Estimate!!Median household income in the past ...\n", "11219 B19013E_001E Estimate!!Median household income in the past ...\n", "11220 B19013F_001E Estimate!!Median household income in the past ...\n", "11221 B19013G_001E Estimate!!Median household income in the past ...\n", "11222 B19013H_001E Estimate!!Median household income in the past ...\n", "11223 B19013I_001E Estimate!!Median household income in the past ...\n", "11932 B19049_001E Estimate!!Median household income in the past ...\n", "11933 B19049_002E Estimate!!Median household income in the past ...\n", "11934 B19049_003E Estimate!!Median household income in the past ...\n", "11935 B19049_004E Estimate!!Median household income in the past ...\n", "11936 B19049_005E Estimate!!Median household income in the past ...\n", "19332 B25099_001E Estimate!!Median household income!!Total\n", "19333 B25099_002E Estimate!!Median household income!!Total!!Medi...\n", "19334 B25099_003E Estimate!!Median household income!!Total!!Medi...\n", "19643 B25119_001E Estimate!!Median household income in the past ...\n", "19644 B25119_002E Estimate!!Median household income in the past ...\n", "19645 B25119_003E Estimate!!Median household income in the past ..." ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "idx = (\n", " vars['Label']\n", " .str\n", " .contains(\n", " 'Median household income',\n", " na = False,\n", " )\n", " )\n", "vars.loc[idx, ['Name', 'Label']]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Web Services" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "path = 'https://api.census.gov/data/2017/acs/acs5'\n", "\n", "query = {\n", " 'get': 'NAME,B19013_001E',\n", " 'for': 'tract:*',\n", " 'in': 'state:24',\n", "}\n", "response = requests.get(path, params=query)\n", "response" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'application/json;charset=utf-8'" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "response.headers['Content-Type']" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01234
0NAMEB19013_001Estatecountytract
1Census Tract 105.01, Wicomico County, Maryland6865224045010501
2Census Tract 5010.02, Carroll County, Maryland7506924013501002
3Census Tract 5077.04, Carroll County, Maryland8830624013507704
4Census Tract 5061.02, Carroll County, Maryland8481024013506102
5Census Tract 5061.01, Carroll County, Maryland9507524013506101
6Census Tract 5052.06, Carroll County, Maryland9190824013505206
7Census Tract 5052.08, Carroll County, Maryland10611624013505208
8Census Tract 5081.02, Carroll County, Maryland7608324013508102
9Census Tract 5081.01, Carroll County, Maryland8482124013508101
10Census Tract 5090.02, Carroll County, Maryland12425024013509002
11Census Tract 5130.02, Carroll County, Maryland12951924013513002
12Census Tract 5130.01, Carroll County, Maryland10994024013513001
13Census Tract 5090.01, Carroll County, Maryland9936024013509001
14Census Tract 5077.03, Carroll County, Maryland8773324013507703
15Census Tract 5052.05, Carroll County, Maryland11277424013505205
16Census Tract 5052.07, Carroll County, Maryland10457724013505207
17Census Tract 5010.01, Carroll County, Maryland7222224013501001
18Census Tract 8035.21, Prince George's County, ...12412524033803521
19Census Tract 8035.23, Prince George's County, ...12859424033803523
20Census Tract 8005.17, Prince George's County, ...14195724033800517
21Census Tract 8035.20, Prince George's County, ...11947924033803520
22Census Tract 8035.27, Prince George's County, ...8638924033803527
23Census Tract 9900, Anne Arundel County, Maryland-66666666624003990000
24Census Tract 7080.04, Anne Arundel County, Mar...6676924003708004
25Census Tract 7080.01, Anne Arundel County, Mar...10687524003708001
26Census Tract 7022.09, Anne Arundel County, Mar...15194424003702209
27Census Tract 7027.02, Anne Arundel County, Mar...16140624003702702
28Census Tract 7407.02, Anne Arundel County, Mar...10803124003740702
29Census Tract 7022.05, Anne Arundel County, Mar...10479524003702205
..................
1377Census Tract 3038.02, Harford County, Maryland8225024025303802
1378Census Tract 3036.05, Harford County, Maryland6693224025303605
1379Census Tract 3065, Harford County, Maryland7339324025306500
1380Census Tract 8761, St. Mary's County, Maryland10068524037876100
1381Census Tract 8762, St. Mary's County, Maryland9213024037876200
1382Census Tract 8754, St. Mary's County, Maryland9892924037875400
1383Census Tract 8755, St. Mary's County, Maryland10263224037875500
1384Census Tract 8756, St. Mary's County, Maryland9818224037875600
1385Census Tract 8750, St. Mary's County, Maryland9954724037875000
1386Census Tract 8751, St. Mary's County, Maryland8843824037875100
1387Census Tract 8757, St. Mary's County, Maryland9394924037875700
1388Census Tract 8024.07, Prince George's County, ...5171224033802407
1389Census Tract 8010.03, Prince George's County, ...10791724033801003
1390Census Tract 8014.08, Prince George's County, ...6157324033801408
1391Census Tract 8059.06, Prince George's County, ...5578124033805906
1392Census Tract 8013.12, Prince George's County, ...8903624033801312
1393Census Tract 8012.12, Prince George's County, ...10102924033801212
1394Census Tract 8001.08, Prince George's County, ...6319524033800108
1395Census Tract 8048.01, Prince George's County, ...4145524033804801
1396Census Tract 8035.26, Prince George's County, ...8435024033803526
1397Census Tract 4113.09, Baltimore County, Maryland10771824005411309
1398Census Tract 4113.08, Baltimore County, Maryland8991724005411308
1399Census Tract 4301.04, Baltimore County, Maryland7983324005430104
1400Census Tract 7012.18, Montgomery County, Maryland7718824031701218
1401Census Tract 1202.02, Baltimore city, Maryland3655224510120202
1402Census Tract 2720.07, Baltimore city, Maryland4045524510272007
1403Census Tract 2720.05, Baltimore city, Maryland6025024510272005
1404Census Tract 1202.01, Baltimore city, Maryland7362524510120201
1405Census Tract 2720.04, Baltimore city, Maryland3812524510272004
1406Census Tract 2720.06, Baltimore city, Maryland3030424510272006
\n", "

1407 rows × 5 columns

\n", "
" ], "text/plain": [ " 0 1 2 \\\n", "0 NAME B19013_001E state \n", "1 Census Tract 105.01, Wicomico County, Maryland 68652 24 \n", "2 Census Tract 5010.02, Carroll County, Maryland 75069 24 \n", "3 Census Tract 5077.04, Carroll County, Maryland 88306 24 \n", "4 Census Tract 5061.02, Carroll County, Maryland 84810 24 \n", "... ... ... ... \n", "1402 Census Tract 2720.07, Baltimore city, Maryland 40455 24 \n", "1403 Census Tract 2720.05, Baltimore city, Maryland 60250 24 \n", "1404 Census Tract 1202.01, Baltimore city, Maryland 73625 24 \n", "1405 Census Tract 2720.04, Baltimore city, Maryland 38125 24 \n", "1406 Census Tract 2720.06, Baltimore city, Maryland 30304 24 \n", "\n", " 3 4 \n", "0 county tract \n", "1 045 010501 \n", "2 013 501002 \n", "3 013 507704 \n", "4 013 506102 \n", "... ... ... \n", "1402 510 272007 \n", "1403 510 272005 \n", "1404 510 120201 \n", "1405 510 272004 \n", "1406 510 272006 \n", "\n", "[1407 rows x 5 columns]" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = pd.read_json(response.content)\n", "data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Specialized Packages" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from census import Census\n", "\n", "key = None\n", "c = Census(key, year=2017)\n", "c.acs5" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "variables = ('NAME', 'B19013_001E')" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'B19013_001E': 68652.0,\n", " 'NAME': 'Census Tract 105.01, Wicomico County, Maryland',\n", " 'county': '045',\n", " 'state': '24',\n", " 'tract': '010501'}" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "response = c.acs5.state_county_tract(\n", " variables,\n", " state_fips='24',\n", " county_fips=Census.ALL,\n", " tract=Census.ALL)\n", "response[0]" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "df = (\n", " pd\n", " .DataFrame(response)\n", " .query(\"B19013_001E >= 0\")\n", ")" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import seaborn as sns\n", "\n", "sns.boxplot(\n", " data = df,\n", " x = 'county',\n", " y = 'B19013_001E',\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Paging & Stashing\n", "\n", "You would need your own API key saved in a api_key.py file like this\n", "```\n", "API_KEY = 'your_key'\n", "```" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "%run api_key.py" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "import requests\n", "\n", "api = 'https://api.data.gov/regulations/v3/'\n", "path = 'document.json'\n", "query = {\n", " 'documentId':'DOI-2017-0002-0001',\n", " 'api_key':API_KEY,\n", " }\n", "doc = (\n", " requests\n", " .get(api + path, params=query)\n", " .json()\n", ")" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "query = {\n", " 'dktid': doc['docketId']['value'],\n", " 'dct': 'PS',\n", " 'api_key': API_KEY,\n", " }\n", "path = 'documents.json'\n", "response = requests.get(api + path, params=query)\n", "dkt = response.json()" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "from schema import Session, Comment\n", "\n", "session = Session()\n", "engine = session.bind" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "query['rpp'] = 10" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [], "source": [ "table = Comment.metadata.tables['comment']\n", "for i in range(0, 15):\n", " \n", " # advance page and query\n", " query['po'] = i * query['rpp']\n", " response = requests.get(api + path, params=query)\n", " page = response.json()\n", " docs = page['documents']\n", " \n", " # save page with session engine\n", " values = [{'comment': doc['commentText']} for doc in docs]\n", " insert = table.insert().values(values)\n", " engine.execute(insert)\n" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [], "source": [ "df = pd.read_sql_table('comment', engine)" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "engine.dispose()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.3" } }, "nbformat": 4, "nbformat_minor": 4 }