We consider data acquired using the API for the National Highway Traffic Safety Administration
From Wikipedia article on Chevrolet Cobalt: "Faulty ignition switches in the Cobalts, which cut power to the car while in motion, were eventually linked to many crashes resulting in fatalities, starting with a teenager in 2005 who drove her new Cobalt into a tree. The switch continued to be used in the manufacture of the vehicles even after the problem was known to GM. On February 21, 2014, GM recalled over 700,000 Cobalts for issues traceable to the defective ignition switches. In May 2014 the NHTSA fined the company $35 million for failing to recall cars with faulty ignition switches for a decade, despite knowing there was a problem with the switches. Thirteen deaths were linked to the faulty switches during the time the company failed to recall the cars."
Let's have a look at the complaints made about the Chevy Cobalt:
import json
import requests
url = 'http://www.nhtsa.gov/webapi/api/Complaints/vehicle/modelyear/2005/make/chevrolet/model/cobalt?format=json'
s = requests.get(url).text
s[0:1000]
data = json.loads(s)
for dd in data:
print(dd)
data['Count']
data['Message']
print(len(data['Results']))
print(type(data['Results']))
data['Results'][0]
data['Results'][0]['Summary']
What fraction of 2005 Chevy Cobalt complaints are about steering?
count = 0
string = 'STEER'
for c in data['Results']:
if string in c['Summary']: count += 1
print(count ,'of',len(data['Results']),'complaints mentioned',string )
How does that compare to Kia Optima
url = 'https://one.nhtsa.gov/webapi/api/Complaints/vehicle/modelyear/2005/make/kia/model/optima?format=json'
s = requests.get(url).text
import json
d = json.loads(s)
r = d['Results']
count = 0
string = 'STEER'
for c in r:
if string in c['Summary']: count += 1
print(count ,'of',len(r),'complaints mentioned',string )
import requests
def complaintstring(year,make,model):
u0 = 'https://one.nhtsa.gov/webapi/api/Complaints/vehicle/modelyear/'
u1 = '/make/'
u2 = '/model/'
u3 = '?format=json'
url = u0 + str(year) + u1 + make.lower() + u2 + model.lower() + u3
return requests.get(url).text
# first make the cache folder for the data
import os
cache = 'nhtsa_cache'
if not os.path.exists(cache):
os.mkdir(cache)
#next select a range of years and the vehicles to study
y0,y1 = 2000,2016
# (make,model,firstyear,lastyear)
cars = [('chevrolet','cobalt',y0,y1),
('chevrolet','malibu',y0,y1),
('ford','fusion',y0,y1),
('honda','civic',y0,y1),
('hyundai','sonata',y0,y1),
('toyota','corolla',y0,y1)]
for car in cars:
for year in range(car[2],car[3]+1): # why the +1?
cache_name = cache + '/' + car[0] + '_'+ car[1] + '_' + str(year) + '.json'
if not os.path.exists(cache_name):
s = complaintstring(year,car[0],car[1])
f = open(cache_name,'w')
f.write(s)
f.close()
else:
print( cache_name, 'already cached' )
%pylab inline
We will see how many complaints mention the keyword 'steer'
import json
keyword = 'STEER'
nc = len(cars)
figure(figsize=(10,1.9*nc))
barwidth = 0.8
ec = 'k'#none'
lw = 0.5
for i,car in enumerate(cars):
ax = subplot(nc,1,i+1)
for year in range(car[2],car[3]+1):
cachename = cache + '/' + car[0]+'_'+car[1]+'_'+str(year)+'.json'
with open(cachename) as f:
data = json.loads(f.read())
n = data['Count']# number of complaints
# count how many complaint summaries mention STEER for each car/year
m = 0;
for c in data['Results']:
if (keyword in c['Summary']):
m = m+1
#m = sum([keyword in c['Summary'] for c in data['Results']])
if n>0:
#print(year,car[0],car[1],n,m,m/n)
bar(year-barwidth/2,n-m,barwidth,color='#7777ff',edgecolor=ec,linewidth=lw)
bar(year-barwidth/2,-m ,barwidth,color='#ff4444',edgecolor=ec,linewidth=lw)
title(car[0].title()+' '+car[1].title(),x = 0.8,y=0.8)
xlim(y0-barwidth,y1+barwidth)
if i<nc-1: xticks([])
suptitle('NHTSA Complaints with those mentioning '+keyword+' below the axis in red')
savefig('nhtsa_'+keyword+'.png')
savefig('nhtsa_'+keyword+'.svg')
data['Results'][0]
keyword = 'STEER'
make = 'kia'
model = 'optima'
for year in range(2000,2018+1):
url = 'https://one.nhtsa.gov/webapi/api/Complaints/vehicle/modelyear/' + str(year) +'/make/' + make+'/model/'+model +'?format=json'
s = requests.get(url).text
import json
d = json.loads(s)
r = d['Results']
count = 0
for c in r:
#print(c['Fire'])
if 'Yes' in c['Fire']:
count += 1
print(count ,'of',len(r),'complaints mentioned fire in ' + str(year) )
print(r[0]['DateofIncident'])
print(float(r[0]['DateofIncident'][6:16]))
import datetime
r[0]['DateofIncident'][6:16]
datetime.datetime.fromtimestamp(float(r[0]['DateofIncident'][6:16]))