Source code for easymoney.sources.world_bank_interface

# coding: utf-8

"""

    World Bank Group Data
    ~~~~~~~~~~~~~~~~~~~~~

"""
# Imports
import re
import wbdata
import numpy as np
import pandas as pd

from easymoney.easy_pandas import twoD_nested_dict


def _wb_rowwise_extractor(wb_row, dict_keys):
    """

    Extracts desired (see return) row information from the raw World Bank download.

    :param wb_row: a row of the raw World Bank Data (as a dataframe).
    :type wb_row: dict
    :return: dict with keys: Country, country id, Indicator, CPI, Year
    :rtype: dict
    """
    # dict with empty keys
    extracted_data_dict = dict.fromkeys(dict_keys)

    # Populate extracted_data_dict
    extracted_data_dict[dict_keys[0]]  =  wb_row['country']['value']
    extracted_data_dict[dict_keys[1]]  =  wb_row['country']['id']
    extracted_data_dict[dict_keys[2]]  =  wb_row['indicator']['value']
    extracted_data_dict[dict_keys[3]]  =  wb_row['value']
    extracted_data_dict[dict_keys[4]]  =  wb_row['date']

    return extracted_data_dict


def _wb_pull(dict_keys, raw_data):
    """

    Parse the information pulled from the World Bank's API.

    :param dict_keys:
    :return: DataFrame with the requested indicator information.
    :rtype: Pandas Dataframe
    """
    # Generate a list populated with dicts for each row of the raw data
    dictionaries = [_wb_rowwise_extractor(w, dict_keys) for w in raw_data]

    # Merge by key
    merged_dict = {k: [x[k] for x in dictionaries] for k in dictionaries[0]}

    # Merge into a single data frame
    data_frame = pd.DataFrame.from_dict(merged_dict)

    # Sort by region and year
    return data_frame.sort_values(['Alpha2', 'Year'], ascending = [1, 0]).reset_index(drop=True)


[docs]def world_bank_pull(value_true_name=None, indicator="FP.CPI.TOTL", return_as='data_frame'): """ | Tool to harvest data for specific indicator from the World Bank Group via their generously provided API. | Extracts world bank information based on a specific indicator and returns a Pandas DataFrame. | Currently, this tools expects the following in the XML data: country, ISO alpha 2 code, an indicator, value name (to be replaced by value_true_name) and year. | Please do not write procedures that slam their servers. | | Acknowledgement: this tools is made possible by the `wbdata` package.¹ | | ¹Sherouse, Oliver (2014). Wbdata. Arlington, VA. | :param value_true_name: reable name for the indicator. If None, this information will be extract from ``indicator``. Defaults to None. :type value_true_name: ``str`` :param indicator: World Bank Indicator. Defaults to "FP.CPI.TOTL". :type indicator: ``str`` :param return_as: 'data_frame' or 'dict' :type return_as: ``str`` :return: DataFrame with the requested indicator information or a dictionary :rtype: ``dict`` or ``Pandas DateFrame`` """ raw_data = wbdata.get_data(indicator) readable_name = value_true_name.split(".")[1] if value_true_name != None else value_true_name dict_keys = ['Country', 'Alpha2', 'Indicator', readable_name, 'Year'] # Convert to DataFrame df = _wb_pull(dict_keys, raw_data) # Fill Empty Values with NaNs df = df.fillna(value=np.NaN).dropna(subset=['Year', 'Alpha2', readable_name]).reset_index(drop=True) if return_as == 'data_frame': return df elif return_as == 'dict': return twoD_nested_dict(df, 'Year', 'Alpha2', readable_name) elif return_as == 'both': return df, twoD_nested_dict(df, 'Year', 'Alpha2', readable_name) else: raise ValueError("Invalid option passed to `return_as`.")