Migration Model Example

References

country-codes.csv contains country codes and miscellaneous metadata on countries. It is sourced from a dataset released by Open Knowledge International under a “Public Domain Dedication and License”. Data from this data set (namely the official English names of countries) is also used in names_to_codes.csv.

Population.csv appears to come from the CIA World Factbook (?).

UN_GDP.csv is GDP data from the UN.

CIA_Unemployment is unemployment data from the CIA World Factbook.

ucdp-prio-acd-4-2016.csv is data on major conflicts. The data comes from the UCDP/PRIO Armed Conflict Dataset which can be downloaded form Uppsala University.

mledoze-countries.csv contains information standardized information about each country and is also used to determine which countries border each other. The original data is hosted by Mohammed Le Doze on GitHub. It is used under the Open Database License.

attachment.csv

API_SM.POP.NETM_DS2_en_csv_v2.csv contains net migration data on each country collected every five years. The data comes from the World Bank under their Terms of Use.

API_SL.UEM.TOTL.ZS_DS2_en_csv_v2.csv (currently unused) comes from the World Bank.

FertilityRate.csv (unused)

Mainconflicttable.csv (unused)

MarriageAge.csv (unused)

Neighbors.csv (unused)

Pop_Women_vs_Men.csv (unused)

Code

This documents and explains the migration model.

import numpy as np
import pandas as pd
import data as data
from constants import POPULATION_SCALE, MIGRATION_THRESHOLD, PROCESSES, SPLITS, BRAIN_DRAIN_THRESHOLD
from gos import Globe
import sys

# The attributes for each agent.
world_columns = ["Country", "Income", "High Income", "Employed", "Attachment",
                 "Location", "Neighborhood", "Migration"]

agentdt = np.dtype([('country', np.object),
                    ('income', np.float32),
                    ('high income', np.bool),
                    ('employed', np.bool),
                    ('attachment', np.float32),
                    ('location', np.object),
                    ('neighborhood', np.uint8),
                    ('migration', np.float32)])

def generate_agents(df, country, population):
    """
    Generate a dataframe of agents for a country where population
    is the number of agents to be created.
    """
    def max_value(attribute):
        return df[attribute].max()
    # Turn this on for truly random output from each process.
    # pid = mp.current_process()._identity[0]
    rand = np.random.mtrand.RandomState(0)
    country_data = df[df.index == country].to_dict("records")[0]
    gdp = country_data["GDP"]
    income_array = gdp / 10 * rand.chisquare(10, (population,1)).astype('float32')
    unemployment_rate = float(country_data["Unemployment"] / 100.0)
    employment_array = rand.choice([True, False], (population,1),
                                   p=[1 - unemployment_rate, unemployment_rate])
    attachment_array = (country_data["Fertility"] *
                        rand.triangular(0.0, 0.5, 1.0, (population,1)) /
                        max_value("Fertility")).astype('float32')
    frame = np.empty([population,1], dtype=agentdt, order='F')
    frame["country"] = country
    frame["income"] = income_array
    frame["high income"] = income_array > gdp * BRAIN_DRAIN_THRESHOLD
    frame["employed" ] = employment_array.astype('bool')
    frame["attachment"] = attachment_array
    frame["location"] = frame["country"]
    frame["neighborhood"] = np.random.randint(10, size=(population,1)).astype('uint8')
    """
    frame = pd.DataFrame({
        "Country": pd.Categorical([country] * population, list(df.index)),
        "Income": income_array,
        "High Income": income_array > gdp * BRAIN_DRAIN_THRESHOLD,
        "Employed": employment_array.astype('bool'),
        "Attachment": attachment_array,
        "Location": pd.Categorical([country] * population, list(df.index)),
        "Neighborhood": np.random.randint(10, size=population).astype('uint8'),
        "Migration": 0,
    }, columns=world_columns)
    """
    return frame


def migrate_array(a, **kwargs):
    if len(a[a.Migration > MIGRATION_THRESHOLD]) == 0:
        return a.Location
    np.random.seed(1000)
    migration_map = kwargs["migration_map"]
    countries = kwargs["countries"]
    for country, population in a.groupby("Location"):
        local_attraction = migration_map[country]
        local_attraction /= local_attraction.sum()
        migrants_num = len(population[population.Migration > MIGRATION_THRESHOLD])
        a.loc[(a.Country == country) & (a.Migration > MIGRATION_THRESHOLD),
              "Location"] = np.random.choice(countries,
                                             p=local_attraction,
                                             size=migrants_num,
                                             replace=True)
    return a.Location


def migrate_score(a, **kwargs):
    max_income = kwargs["max_income"]
    conflict_scores = kwargs["conflict"]
    max_conflict = kwargs["max_conflict"]
    conflict = conflict_scores.merge(a, left_index=True,
                                     right_on='Location')["Conflict"] / max_conflict
    gdp = kwargs["gdp"]
    # Brain drain
    a.loc[a["High Income"] == True, "Income"] = 0
    return ((10 * (1 + a.Income / -max_income) +
             10 * a.Attachment +
             (5 * conflict) +
             3 + a.Employed * 4) / 32).astype('float32')


def main(proc=PROCESSES):
    np.random.seed(1000)
    globe = Globe(data.all(), processes=proc, splits=SPLITS)

    globe.create_agents(generate_agents)
    print(globe.agents)
    """
    globe.agents.Migration = globe.run_par(migrate_score, max_income=globe.agents.Income.max(),
                                           conflict=globe.df[["Conflict"]].sort_index(),
                                           gdp=globe.df[["GDP"]].sort_index(),
                                           max_conflict=globe.df.Conflict.max(),
                                           columns=["Income", "High Income", "Employed", "Attachment", "Location"])
    print("The potential migrants came from")
    migrants = globe.agents[globe.agents.Migration > MIGRATION_THRESHOLD]
    print(migrants.Country.value_counts()[migrants.Country.value_counts().gt(0)])
    attractiveness = ((1 - globe.df["Conflict"] / globe.max_value("Conflict")) +
                      (globe.df["GDP"] / globe.max_value("GDP")) +
                      (1 - globe.df["Unemployment"] / globe.max_value("Unemployment")) +
                      (1 - globe.df["Fertility"] / globe.max_value("Fertility")))

    def neighbors(country):
        return globe.df[globe.df.index == country].iloc[0].neighbors

    migration_map = {}
    for country in globe.df.index:
        local_attraction = attractiveness.copy()
        local_attraction[local_attraction.index.isin(neighbors(country))] += 1
        migration_map[country] = local_attraction

    globe.agents["Location"] = globe.run_par(migrate_array, migration_map=migration_map,
                                             countries=globe.df.index,
                                             columns=["Country", "Location", "Migration"])

    print("Migration model completed at a scale of {}:1.".format(int(1 / POPULATION_SCALE)))
    print("The model used {} child processes".format(globe.processes))
    migrants = globe.agents[globe.agents.Country != globe.agents.Location]
    print("There were a total of {} migrants.".format(len(migrants)))
    print("There were a total of {} agents.".format(len(globe.agents)))
    changes = (globe.agents.Location.value_counts() -
               globe.agents.Country.value_counts()).sort_values()
    print(changes.head())
    print(changes.tail())
    print("The potential migrants came from")
    migrants = globe.agents[globe.agents.Migration > MIGRATION_THRESHOLD]
    print(migrants.Country.value_counts()[migrants.Country.value_counts().gt(0)])
    return globe
    """


if __name__ == "__main__":
    if len(sys.argv) > 1:
        main(int(sys.argv[1]))
    else:
        main()