Parse JSON From External URL With Python



Let's get started

In its most basic form.

from requests import get

data = get(url).json()

Now let's add a url and print out the results. Lets use https://jsonplaceholder.typicode.com as it's free and that.

from requests import get

url = 'https://jsonplaceholder.typicode.com/users/1'

data = get(url).json()
print(data)

We'll get some data which looks something like this.

{'id': 1, 'name': 'Leanne Graham', 'username': 'Bret', 'email': 'Sincere@april.biz', 'address': {'street': 'Kulas Light', 'suite': 'Apt. 556', 'city': 'Gwenborough', 'zipcode': '92998-3874', 'geo': {'lat':'-37.3159', 'lng': '81.1496'}}, 'phone': '1-770-736-8031 x56442', 'website': 'hildegard.org', 'company': {'name': 'Romaguera-Crona', 'catchPhrase': 'Multi-layered client-server neural-net', 'bs': 'harness real-time e-markets'}}

Lets beautify that so we can easily read it.

{
  'id': 1,
  'name': 'Leanne Graham',
  'username': 'Bret',
  'email': 'Sincere@april.biz',
  'address': {
    'street': 'Kulas Light',
    'suite': 'Apt. 556',
    'city': 'Gwenborough',
    'zipcode': '92998-3874',
    'geo': {
      'lat': '-37.3159',
      'lng': '81.1496'
    }
  },
  'phone': '1-770-736-8031 x56442',
  'website': 'hildegard.org',
  'company': {
    'name': 'Romaguera-Crona',
    'catchPhrase': 'Multi-layered client-server neural-net',
    'bs': 'harness real-time e-markets'
  }
}

Next we'll want to print out specific values.

from requests import get

data = get('https://jsonplaceholder.typicode.com/users/1').json()

print(data['id'])
print(data['name'])
print(data['username'])
print(data['email'])
print(data['phone'])
print(data['website'])

That's just the top level values. Now we'll get rest of them... the nested values.

from requests import get

data = get('https://jsonplaceholder.typicode.com/users/1').json()

print(data['id'])
print(data['name'])
print(data['username'])
print(data['email'])
print(data['address']['street'])
print(data['address']['suite'])
print(data['address']['city'])
print(data['address']['zipcode'])
print(data['address']['geo']['lat'])
print(data['address']['geo']['lng'])
print(data['phone'])
print(data['website'])
print(data['company']['name'])
print(data['company']['catchPhrase'])
print(data['company']['bs'])

There's more entries if we change the url to get all results (https://jsonplaceholder.typicode.com/users/1 => https://jsonplaceholder.typicode.com/users).

from requests import get

datas = get('https://jsonplaceholder.typicode.com/users').json()
print(datas)

Which returns...

[{
  'id': 1,
  'name': 'Leanne Graham',
  'username': 'Bret',
  'email': 'Sincere@april.biz',
  'address': {
    'street': 'Kulas Light',
    'suite': 'Apt. 556',
    'city': 'Gwenborough',
    'zipcode': '92998-3874',
    'geo': {
      'lat': '-37.3159',
      'lng': '81.1496'
    }
  },
  'phone': '1-770-736-8031 x56442',
  'website': 'hildegard.org',
  'company': {
    'name': 'Romaguera-Crona',
    'catchPhrase': 'Multi-layered client-server neural-net',
    'bs': 'harnessreal-time e-markets'
  }
}, {
  'id': 2,
  'name': 'Ervin Howell',
  'username': 'Antonette',
  'email': 'Shanna@melissa.tv',
  'address': {
    'street': 'Victor Plains',
    'suite': 'Suite 879',
    'city': 'Wisokyburgh',
    'zipcode': '90566-7771',
    'geo': {
      'lat': '-43.9509',
      'lng': '-34.4618'
    }
  },
  'phone': '010-692-6593 x09125',
  'website': 'anastasia.net',
  'company': {
    'name': 'Deckow-Crist',
    'catchPhrase': 'Proactive didactic contingency',
    'bs': 'synergize scalable supply-chains'
  }
}, {
  'id': 3,
  'name': 'Clementine Bauch',
  'username': 'Samantha',
  'email': 'Nathan@yesenia.net',
  'address': {
    'street': 'Douglas Extension',
    'suite': 'Suite 847',
    'city': 'McKenziehaven',
    'zipcode': '59590-4157',
    'geo': {
      'lat': '-68.6102',
      'lng': '-47.0653'
    }
  },
  'phone': '1-463-123-4447',
  'website': 'ramiro.info',
  'company': {
    'name': 'Romaguera-Jacobson',
    'catchPhrase': 'Face to face bifurcated interface',
    'bs': 'e-enable strategic applications'
  }
}, {
  'id': 4,
  'name': 'Patricia Lebsack',
  'username': 'Karianne',
  'email': 'Julianne.OConner@kory.org',
  'address': {
    'street': 'Hoeger Mall',
    'suite': 'Apt. 692',
    'city': 'South Elvis',
    'zipcode': '53919-4257',
    'geo': {
      'lat': '29.4572',
      'lng': '-164.2990'
    }
  },
  'phone': '493-170-9623 x156',
  'website': 'kale.biz',
  'company': {
    'name': 'Robel-Corkery',
    'catchPhrase': 'Multi-tiered zero tolerance productivity',
    'bs': 'transition cutting-edgeweb services'
  }
}, {
  'id': 5,
  'name': 'Chelsey Dietrich',
  'username': 'Kamren',
  'email': 'Lucio_Hettinger@annie.ca',
  'address': {
    'street': 'Skiles Walks',
    'suite': 'Suite 351',
    'city': 'Roscoeview',
    'zipcode': '33263',
    'geo': {
      'lat': '-31.8129',
      'lng': '62.5342'
    }
  },
  'phone': '(254)954-1289',
  'website': 'demarco.info',
  'company': {
    'name': 'Keebler LLC',
    'catchPhrase': 'User-centric fault-tolerant solution',
    'bs': 'revolutionize end-to-end systems'
  }
}, {
  'id': 6,
  'name': 'Mrs. Dennis Schulist',
  'username': 'Leopoldo_Corkery',
  'email': 'Karley_Dach@jasper.info',
  'address': {
    'street': 'Norberto Crossing',
    'suite': 'Apt.950',
    'city': 'South Christy',
    'zipcode': '23505-1337',
    'geo': {
      'lat': '-71.4197',
      'lng': '71.7478'
    }
  },
  'phone': '1-477-935-8478 x6430',
  'website': 'ola.org',
  'company': {
    'name': 'Considine-Lockman',
    'catchPhrase': 'Synchronised bottom-line interface',
    'bs': 'e-enable innovative applications'
  }
}, {
  'id': 7,
  'name': 'Kurtis Weissnat',
  'username': 'Elwyn.Skiles',
  'email': 'Telly.Hoeger@billy.biz',
  'address': {
    'street': 'Rex Trail',
    'suite': 'Suite 280',
    'city': 'Howemouth',
    'zipcode': '58804-1099',
    'geo': {
      'lat': '24.8918',
      'lng': '21.8984'
    }
  },
  'phone': '210.067.6132',
  'website': 'elvis.io',
  'company': {
    'name': 'Johns Group',
    'catchPhrase': 'Configurable multimedia task-force',
    'bs': 'generate enterprise e-tailers'
  }
}, {
  'id': 8,
  'name': 'Nicholas Runolfsdottir V',
  'username': 'Maxime_Nienow',
  'email': 'Sherwood@rosamond.me',
  'address': {
    'street': 'Ellsworth Summit',
    'suite': 'Suite 729',
    'city': 'Aliyaview',
    'zipcode': '45169',
    'geo': {
      'lat': '-14.3990',
      'lng': '-120.7677'
    }
  },
  'phone': '586.493.6943 x140',
  'website': 'jacynthe.com',
  'company': {
    'name': 'Abernathy Group',
    'catchPhrase': 'Implemented secondary concept',
    'bs': 'e-enable extensible e-tailers'
  }
}, {
  'id': 9,
  'name': 'Glenna Reichert',
  'username': 'Delphine',
  'email': 'Chaim_McDermott@dana.io',
  'address': {
    'street': 'Dayna Park',
    'suite': 'Suite 449',
    'city': 'Bartholomebury',
    'zipcode': '76495-3109',
    'geo': {
      'lat': '24.6463',
      'lng': '-168.8889'
    }
  },
  'phone': '(775)976-6794 x41206',
  'website': 'conrad.com',
  'company': {
    'name': 'Yost and Sons',
    'catchPhrase': 'Switchablecontextually-based project',
    'bs': 'aggregate real-time technologies'
  }
}, {
  'id': 10,
  'name': 'Clementina DuBuque',
  'username': 'Moriah.Stanton',
  'email': 'Rey.Padberg@karina.biz',
  'address': {
    'street': 'Kattie Turnpike',
    'suite': 'Suite 198',
    'city': 'Lebsackbury',
    'zipcode': '31428-2261',
    'geo': {
      'lat': '-38.2386',
      'lng': '57.2232'
    }
  },
  'phone': '024-648-3804',
  'website': 'ambrose.net',
  'company': {
    'name': 'Hoeger LLC',
    'catchPhrase': 'Centralized empowering task-force',
    'bs': 'target end-to-end models'
  }
}]

Let's loop over them and print them out.

from requests import get

datas = get('https://jsonplaceholder.typicode.com/users').json()

for data in datas:
    print(data['id'])
    print(data['name'])
    print(data['username'])
    print(data['email'])
    print(data['address']['street'])
    print(data['address']['suite'])
    print(data['address']['city'])
    print(data['address']['zipcode'])
    print(data['address']['geo']['lat'])
    print(data['address']['geo']['lng'])
    print(data['phone'])
    print(data['website'])
    print(data['company']['name'])
    print(data['company']['catchPhrase'])
    print(data['company']['bs'])

Let's print all these out like this for some reason.

from requests import get

datas = get('https://jsonplaceholder.typicode.com/users').json()

for data in datas:
    print('id: {}'.format(data['id']))
    print('name: {}'.format(data['name']))
    print('username: {}'.format(data['username']))
    print('email: {}'.format(data['email']))
    print('address street: {}'.format(data['address']['street']))
    print('address suite: {}'.format(data['address']['suite']))
    print('address city: {}'.format(data['address']['city']))
    print('address zipcode: {}'.format(data['address']['zipcode']))
    print('address geo lat: {}'.format(data['address']['geo']['lat']))
    print('address geo lng: {}'.format(data['address']['geo']['lng']))
    print('phone: {}'.format(data['phone']))
    print('website: {}'.format(data['website']))
    print('company name: {}'.format(data['company']['name']))
    print('company catchPhrase: {}'.format(data['company']['catchPhrase']))
    print('company bs: {}\n'.format(data['company']['bs']))

Just for fun

It looks like we have a pile of useless text. Lets go and get as much data as possible from this API and play around with it.

from requests import get
from wordcloud import WordCloud
from PIL import Image
import numpy as np

def get_json(endpoint):
    return get('https://jsonplaceholder.typicode.com/{}'.format(endpoint)).json()

endpoints = ['users', 'posts', 'comments', 'todos', 'photos', 'albums']
cloud_fuel = []

for endpoint in endpoints:
    results = get_json(endpoint)
    if endpoint == 'users':
        for data in results:
            cloud_fuel.append(data['name'])
            cloud_fuel.append(data['username'])
            cloud_fuel.append(data['email'])
            cloud_fuel.append(data['address']['street'])
            cloud_fuel.append(data['address']['suite'])
            cloud_fuel.append(data['address']['city'])
            cloud_fuel.append(data['address']['zipcode'])
            cloud_fuel.append(data['phone'])
            cloud_fuel.append(data['website'])
            cloud_fuel.append(data['company']['name'])
            cloud_fuel.append(data['company']['catchPhrase'])
            cloud_fuel.append(data['company']['bs'])
    elif endpoint == 'comments':
        for data in results:
            cloud_fuel.append(data['name'])
            cloud_fuel.append(data['email'])
            cloud_fuel.append(data['body'])
    elif endpoint == 'posts':
        for data in results:
            cloud_fuel.append(data['title'])
            cloud_fuel.append(data['body'])
    else:
        for data in results:
            cloud_fuel.append(data['title'])

fuel = ' '.join(cloud_fuel)

img = Image.new('RGB', (1200, 1200), color = 'black')
img.save('cloud.jpg')
mask = np.array(Image.open('cloud.jpg'))
wc = WordCloud(background_color='#000', max_words=9999,
               font_path='nexa.otf', collocations=False, mask=mask, margin=30)

wc.generate(fuel)
wc.to_file('cloud.jpg')

That'll do it I guess.

wordcloud

Resources

Thanks for reading. x