
import pandas as pd
shopping = pd.read_excel('ShoppingBaskets.xls')
shopping_data = shopping.drop('BasketNo', axis=1)
shopping_data.head()
| ThinkPad X220 | Asus EeePC | HP Laserjet P2055 | 2 GB DDR3 RAM | 8 GB DDR3 RAM | Lenovo Tablet Sleeve | Netbook-Schutzhülle | HP CE50 Toner | LT Laser Maus | LT Minimaus | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 |
| 1 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 0 |
| 2 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 1 | 0 |
| 3 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 |
| 4 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 |
from orangecontrib.associate.fpgrowth import *
itemsets = dict(frequent_itemsets(shopping_data.values, 0.20)) # take care to use .values
rows = []
for itemset, support in itemsets.items():
domain_names= [shopping_data.columns[item_index] for item_index in itemset]
rows.append((len(itemset), support, domain_names))
item_set_table = pd.DataFrame(rows, columns=["size", "support", "items"])
item_set_table.sort_values('support', ascending = False)
| size | support | items | |
|---|---|---|---|
| 1 | 1 | 6 | [Asus EeePC] |
| 4 | 1 | 5 | [2 GB DDR3 RAM] |
| 5 | 2 | 5 | [Asus EeePC, 2 GB DDR3 RAM] |
| 33 | 1 | 5 | [LT Minimaus] |
| 34 | 2 | 4 | [Asus EeePC, LT Minimaus] |
| 24 | 1 | 4 | [LT Laser Maus] |
| 15 | 3 | 4 | [Asus EeePC, 2 GB DDR3 RAM, Netbook-Schutzhülle ] |
| 14 | 2 | 4 | [2 GB DDR3 RAM, Netbook-Schutzhülle ] |
| 13 | 2 | 4 | [Asus EeePC, Netbook-Schutzhülle ] |
| 12 | 1 | 4 | [Netbook-Schutzhülle ] |
| 0 | 1 | 4 | [ThinkPad X220 ] |
| 8 | 1 | 4 | [Lenovo Tablet Sleeve] |
| 18 | 2 | 3 | [HP Laserjet P2055, HP CE50 Toner] |
| 36 | 3 | 3 | [Asus EeePC, 2 GB DDR3 RAM, LT Minimaus] |
| 38 | 2 | 3 | [LT Minimaus, Netbook-Schutzhülle ] |
| 39 | 3 | 3 | [Asus EeePC, Netbook-Schutzhülle , LT Minimaus] |
| 40 | 3 | 3 | [LT Minimaus, 2 GB DDR3 RAM, Netbook-Schutzhül... |
| 35 | 2 | 3 | [LT Minimaus, 2 GB DDR3 RAM] |
| 41 | 4 | 3 | [Asus EeePC, 2 GB DDR3 RAM, Netbook-Schutzhüll... |
| 2 | 1 | 3 | [HP Laserjet P2055] |
| 16 | 1 | 3 | [HP CE50 Toner] |
| 9 | 2 | 3 | [ThinkPad X220 , Lenovo Tablet Sleeve] |
| 19 | 3 | 2 | [ThinkPad X220 , HP Laserjet P2055, HP CE50 To... |
| 31 | 3 | 2 | [LT Laser Maus, 2 GB DDR3 RAM, Netbook-Schutzh... |
| 3 | 2 | 2 | [ThinkPad X220 , HP Laserjet P2055] |
| 6 | 1 | 2 | [8 GB DDR3 RAM] |
| 37 | 2 | 2 | [LT Minimaus, Lenovo Tablet Sleeve] |
| 7 | 2 | 2 | [ThinkPad X220 , 8 GB DDR3 RAM] |
| 10 | 2 | 2 | [HP Laserjet P2055, Lenovo Tablet Sleeve] |
| 11 | 3 | 2 | [ThinkPad X220 , HP Laserjet P2055, Lenovo Tab... |
| 32 | 4 | 2 | [LT Laser Maus, Asus EeePC, 2 GB DDR3 RAM, Net... |
| 30 | 3 | 2 | [LT Laser Maus, Asus EeePC, Netbook-Schutzhülle ] |
| 20 | 2 | 2 | [Lenovo Tablet Sleeve, HP CE50 Toner] |
| 29 | 2 | 2 | [LT Laser Maus, Netbook-Schutzhülle ] |
| 28 | 3 | 2 | [LT Laser Maus, Asus EeePC, 2 GB DDR3 RAM] |
| 27 | 2 | 2 | [LT Laser Maus, 2 GB DDR3 RAM] |
| 26 | 2 | 2 | [LT Laser Maus, Asus EeePC] |
| 25 | 2 | 2 | [ThinkPad X220 , LT Laser Maus] |
| 23 | 4 | 2 | [ThinkPad X220 , HP Laserjet P2055, Lenovo Tab... |
| 22 | 3 | 2 | [HP Laserjet P2055, Lenovo Tablet Sleeve, HP C... |
| 17 | 2 | 2 | [ThinkPad X220 , HP CE50 Toner] |
| 21 | 3 | 2 | [ThinkPad X220 , Lenovo Tablet Sleeve, HP CE50... |
rules = association_rules(itemsets, 0.70)
rows = []
for premis, conclusion, sup, conf,cov, strength, lift, leverage in rules_stats(rules, itemsets, len(shopping_data)):
premis_names = [shopping_data.columns[item_index] for item_index in premis]
conclusion_names = [shopping_data.columns[item_index] for item_index in conclusion]
rows.append((premis_names, conclusion_names, sup, conf,cov, strength, lift, leverage))
pd.DataFrame(rows, columns = ['Premis', 'Conclusion', 'Support', 'Confidence', 'Coverage', 'Strength', 'Lift', 'Leverage'])
| Premis | Conclusion | Support | Confidence | Coverage | Strength | Lift | Leverage | |
|---|---|---|---|---|---|---|---|---|
| 0 | [HP Laserjet P2055, Lenovo Tablet Sleeve, HP C... | [ThinkPad X220 ] | 2 | 1.000000 | 0.2 | 2.000000 | 2.500000 | 0.12 |
| 1 | [ThinkPad X220 , Lenovo Tablet Sleeve, HP CE50... | [HP Laserjet P2055] | 2 | 1.000000 | 0.2 | 1.500000 | 3.333333 | 0.14 |
| 2 | [Lenovo Tablet Sleeve, HP CE50 Toner] | [ThinkPad X220 , HP Laserjet P2055] | 2 | 1.000000 | 0.2 | 1.000000 | 5.000000 | 0.16 |
| 3 | [ThinkPad X220 , HP Laserjet P2055, HP CE50 To... | [Lenovo Tablet Sleeve] | 2 | 1.000000 | 0.2 | 2.000000 | 2.500000 | 0.12 |
| 4 | [ThinkPad X220 , HP CE50 Toner] | [HP Laserjet P2055, Lenovo Tablet Sleeve] | 2 | 1.000000 | 0.2 | 1.000000 | 5.000000 | 0.16 |
| 5 | [ThinkPad X220 , HP Laserjet P2055, Lenovo Tab... | [HP CE50 Toner] | 2 | 1.000000 | 0.2 | 1.500000 | 3.333333 | 0.14 |
| 6 | [HP Laserjet P2055, Lenovo Tablet Sleeve] | [ThinkPad X220 , HP CE50 Toner] | 2 | 1.000000 | 0.2 | 1.000000 | 5.000000 | 0.16 |
| 7 | [ThinkPad X220 , HP Laserjet P2055] | [Lenovo Tablet Sleeve, HP CE50 Toner] | 2 | 1.000000 | 0.2 | 1.000000 | 5.000000 | 0.16 |
| 8 | [LT Laser Maus, 2 GB DDR3 RAM, Netbook-Schutzh... | [Asus EeePC] | 2 | 1.000000 | 0.2 | 3.000000 | 1.666667 | 0.08 |
| 9 | [LT Laser Maus, Asus EeePC, Netbook-Schutzhülle ] | [2 GB DDR3 RAM] | 2 | 1.000000 | 0.2 | 2.500000 | 2.000000 | 0.10 |
| 10 | [LT Laser Maus, Netbook-Schutzhülle ] | [Asus EeePC, 2 GB DDR3 RAM] | 2 | 1.000000 | 0.2 | 2.500000 | 2.000000 | 0.10 |
| 11 | [LT Laser Maus, Asus EeePC, 2 GB DDR3 RAM] | [Netbook-Schutzhülle ] | 2 | 1.000000 | 0.2 | 2.000000 | 2.500000 | 0.12 |
| 12 | [LT Laser Maus, 2 GB DDR3 RAM] | [Asus EeePC, Netbook-Schutzhülle ] | 2 | 1.000000 | 0.2 | 2.000000 | 2.500000 | 0.12 |
| 13 | [LT Laser Maus, Asus EeePC] | [2 GB DDR3 RAM, Netbook-Schutzhülle ] | 2 | 1.000000 | 0.2 | 2.000000 | 2.500000 | 0.12 |
| 14 | [LT Minimaus, 2 GB DDR3 RAM, Netbook-Schutzhül... | [Asus EeePC] | 3 | 1.000000 | 0.3 | 2.000000 | 1.666667 | 0.12 |
| 15 | [Asus EeePC, Netbook-Schutzhülle , LT Minimaus] | [2 GB DDR3 RAM] | 3 | 1.000000 | 0.3 | 1.666667 | 2.000000 | 0.15 |
| 16 | [LT Minimaus, Netbook-Schutzhülle ] | [Asus EeePC, 2 GB DDR3 RAM] | 3 | 1.000000 | 0.3 | 1.666667 | 2.000000 | 0.15 |
| 17 | [Asus EeePC, 2 GB DDR3 RAM, LT Minimaus] | [Netbook-Schutzhülle ] | 3 | 1.000000 | 0.3 | 1.333333 | 2.500000 | 0.18 |
| 18 | [LT Minimaus, 2 GB DDR3 RAM] | [Asus EeePC, Netbook-Schutzhülle ] | 3 | 1.000000 | 0.3 | 1.333333 | 2.500000 | 0.18 |
| 19 | [Asus EeePC, LT Minimaus] | [2 GB DDR3 RAM, Netbook-Schutzhülle ] | 3 | 0.750000 | 0.4 | 1.000000 | 1.875000 | 0.14 |
| 20 | [Asus EeePC, 2 GB DDR3 RAM, Netbook-Schutzhülle ] | [LT Minimaus] | 3 | 0.750000 | 0.4 | 1.250000 | 1.500000 | 0.10 |
| 21 | [2 GB DDR3 RAM, Netbook-Schutzhülle ] | [LT Minimaus, Asus EeePC] | 3 | 0.750000 | 0.4 | 1.000000 | 1.875000 | 0.14 |
| 22 | [Asus EeePC, Netbook-Schutzhülle ] | [LT Minimaus, 2 GB DDR3 RAM] | 3 | 0.750000 | 0.4 | 0.750000 | 2.500000 | 0.18 |
| 23 | [Netbook-Schutzhülle ] | [LT Minimaus, 2 GB DDR3 RAM, Asus EeePC] | 3 | 0.750000 | 0.4 | 0.750000 | 2.500000 | 0.18 |
| 24 | [HP Laserjet P2055, Lenovo Tablet Sleeve] | [ThinkPad X220 ] | 2 | 1.000000 | 0.2 | 2.000000 | 2.500000 | 0.12 |
| 25 | [ThinkPad X220 , HP Laserjet P2055] | [Lenovo Tablet Sleeve] | 2 | 1.000000 | 0.2 | 2.000000 | 2.500000 | 0.12 |
| 26 | [2 GB DDR3 RAM, Netbook-Schutzhülle ] | [Asus EeePC] | 4 | 1.000000 | 0.4 | 1.500000 | 1.666667 | 0.16 |
| 27 | [Asus EeePC, Netbook-Schutzhülle ] | [2 GB DDR3 RAM] | 4 | 1.000000 | 0.4 | 1.250000 | 2.000000 | 0.20 |
| 28 | [Netbook-Schutzhülle ] | [Asus EeePC, 2 GB DDR3 RAM] | 4 | 1.000000 | 0.4 | 1.250000 | 2.000000 | 0.20 |
| 29 | [Asus EeePC, 2 GB DDR3 RAM] | [Netbook-Schutzhülle ] | 4 | 0.800000 | 0.5 | 0.800000 | 2.000000 | 0.20 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 35 | [Lenovo Tablet Sleeve, HP CE50 Toner] | [HP Laserjet P2055] | 2 | 1.000000 | 0.2 | 1.500000 | 3.333333 | 0.14 |
| 36 | [HP Laserjet P2055, Lenovo Tablet Sleeve] | [HP CE50 Toner] | 2 | 1.000000 | 0.2 | 1.500000 | 3.333333 | 0.14 |
| 37 | [LT Laser Maus, 2 GB DDR3 RAM] | [Asus EeePC] | 2 | 1.000000 | 0.2 | 3.000000 | 1.666667 | 0.08 |
| 38 | [LT Laser Maus, Asus EeePC] | [2 GB DDR3 RAM] | 2 | 1.000000 | 0.2 | 2.500000 | 2.000000 | 0.10 |
| 39 | [LT Laser Maus, Netbook-Schutzhülle ] | [Asus EeePC] | 2 | 1.000000 | 0.2 | 3.000000 | 1.666667 | 0.08 |
| 40 | [LT Laser Maus, Asus EeePC] | [Netbook-Schutzhülle ] | 2 | 1.000000 | 0.2 | 2.000000 | 2.500000 | 0.12 |
| 41 | [LT Laser Maus, Netbook-Schutzhülle ] | [2 GB DDR3 RAM] | 2 | 1.000000 | 0.2 | 2.500000 | 2.000000 | 0.10 |
| 42 | [LT Laser Maus, 2 GB DDR3 RAM] | [Netbook-Schutzhülle ] | 2 | 1.000000 | 0.2 | 2.000000 | 2.500000 | 0.12 |
| 43 | [LT Minimaus, 2 GB DDR3 RAM] | [Asus EeePC] | 3 | 1.000000 | 0.3 | 2.000000 | 1.666667 | 0.12 |
| 44 | [Asus EeePC, LT Minimaus] | [2 GB DDR3 RAM] | 3 | 0.750000 | 0.4 | 1.250000 | 1.500000 | 0.10 |
| 45 | [LT Minimaus, Netbook-Schutzhülle ] | [Asus EeePC] | 3 | 1.000000 | 0.3 | 2.000000 | 1.666667 | 0.12 |
| 46 | [Asus EeePC, LT Minimaus] | [Netbook-Schutzhülle ] | 3 | 0.750000 | 0.4 | 1.000000 | 1.875000 | 0.14 |
| 47 | [Asus EeePC, Netbook-Schutzhülle ] | [LT Minimaus] | 3 | 0.750000 | 0.4 | 1.250000 | 1.500000 | 0.10 |
| 48 | [Netbook-Schutzhülle ] | [LT Minimaus, Asus EeePC] | 3 | 0.750000 | 0.4 | 1.000000 | 1.875000 | 0.14 |
| 49 | [2 GB DDR3 RAM, Netbook-Schutzhülle ] | [LT Minimaus] | 3 | 0.750000 | 0.4 | 1.250000 | 1.500000 | 0.10 |
| 50 | [Netbook-Schutzhülle ] | [LT Minimaus, 2 GB DDR3 RAM] | 3 | 0.750000 | 0.4 | 0.750000 | 2.500000 | 0.18 |
| 51 | [LT Minimaus, Netbook-Schutzhülle ] | [2 GB DDR3 RAM] | 3 | 1.000000 | 0.3 | 1.666667 | 2.000000 | 0.15 |
| 52 | [LT Minimaus, 2 GB DDR3 RAM] | [Netbook-Schutzhülle ] | 3 | 1.000000 | 0.3 | 1.333333 | 2.500000 | 0.18 |
| 53 | [2 GB DDR3 RAM] | [Asus EeePC] | 5 | 1.000000 | 0.5 | 1.200000 | 1.666667 | 0.20 |
| 54 | [Asus EeePC] | [2 GB DDR3 RAM] | 5 | 0.833333 | 0.6 | 0.833333 | 1.666667 | 0.20 |
| 55 | [8 GB DDR3 RAM] | [ThinkPad X220 ] | 2 | 1.000000 | 0.2 | 2.000000 | 2.500000 | 0.12 |
| 56 | [Lenovo Tablet Sleeve] | [ThinkPad X220 ] | 3 | 0.750000 | 0.4 | 1.000000 | 1.875000 | 0.14 |
| 57 | [ThinkPad X220 ] | [Lenovo Tablet Sleeve] | 3 | 0.750000 | 0.4 | 1.000000 | 1.875000 | 0.14 |
| 58 | [Netbook-Schutzhülle ] | [Asus EeePC] | 4 | 1.000000 | 0.4 | 1.500000 | 1.666667 | 0.16 |
| 59 | [Netbook-Schutzhülle ] | [2 GB DDR3 RAM] | 4 | 1.000000 | 0.4 | 1.250000 | 2.000000 | 0.20 |
| 60 | [2 GB DDR3 RAM] | [Netbook-Schutzhülle ] | 4 | 0.800000 | 0.5 | 0.800000 | 2.000000 | 0.20 |
| 61 | [HP CE50 Toner] | [HP Laserjet P2055] | 3 | 1.000000 | 0.3 | 1.000000 | 3.333333 | 0.21 |
| 62 | [HP Laserjet P2055] | [HP CE50 Toner] | 3 | 1.000000 | 0.3 | 1.000000 | 3.333333 | 0.21 |
| 63 | [LT Minimaus] | [Asus EeePC] | 4 | 0.800000 | 0.5 | 1.200000 | 1.333333 | 0.10 |
| 64 | [Netbook-Schutzhülle ] | [LT Minimaus] | 3 | 0.750000 | 0.4 | 1.250000 | 1.500000 | 0.10 |
65 rows × 8 columns
from scipy.io import arff
adult_arff_data, adult_arff_meta = arff.loadarff(open('adult-dataset-tweaked.arff', 'r'))
adult = pd.DataFrame(adult_arff_data)
adult = adult.applymap(lambda x: x.decode('utf8') if hasattr(x, 'decode') else x)
adult.head()
| age | workclass | fnlwgt | education | education-num | marital-status | occupation | relationship | race | sex | capital-gain | capital-loss | hours-per-week | native-country | class | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 25.0 | Private | 226802.0 | 11th | 7.0 | Never-married | Machine-op-inspct | Own-child | Black | X | 0.0 | 0.0 | 40.0 | United-States | X |
| 1 | 25.0 | Private | 226802.0 | 11th | 7.0 | Never-married | Machine-op-inspct | Own-child | Black | Male | 0.0 | 0.0 | 40.0 | United-States | <=50K |
| 2 | 38.0 | Private | 89814.0 | HS-grad | 9.0 | Married-civ-spouse | Farming-fishing | Husband | White | Male | 0.0 | 0.0 | 50.0 | United-States | <=50K |
| 3 | 28.0 | Local-gov | 336951.0 | Assoc-acdm | 12.0 | Married-civ-spouse | Protective-serv | Husband | White | Male | 0.0 | 0.0 | 40.0 | United-States | >50K |
| 4 | 44.0 | Private | 160323.0 | Some-college | 10.0 | Married-civ-spouse | Machine-op-inspct | Husband | Black | Male | 7688.0 | 0.0 | 40.0 | United-States | >50K |
adult_aggregate = adult.copy()
adult_aggregate['education'].replace(['Bachelors','Masters','Assoc-acdm','Prof-school','Assoc-voc', 'Doctorate'], 'Other-Grad', inplace=True)
adult_aggregate.head()
| age | workclass | fnlwgt | education | education-num | marital-status | occupation | relationship | race | sex | capital-gain | capital-loss | hours-per-week | native-country | class | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 25.0 | Private | 226802.0 | 11th | 7.0 | Never-married | Machine-op-inspct | Own-child | Black | X | 0.0 | 0.0 | 40.0 | United-States | X |
| 1 | 25.0 | Private | 226802.0 | 11th | 7.0 | Never-married | Machine-op-inspct | Own-child | Black | Male | 0.0 | 0.0 | 40.0 | United-States | <=50K |
| 2 | 38.0 | Private | 89814.0 | HS-grad | 9.0 | Married-civ-spouse | Farming-fishing | Husband | White | Male | 0.0 | 0.0 | 50.0 | United-States | <=50K |
| 3 | 28.0 | Local-gov | 336951.0 | Other-Grad | 12.0 | Married-civ-spouse | Protective-serv | Husband | White | Male | 0.0 | 0.0 | 40.0 | United-States | >50K |
| 4 | 44.0 | Private | 160323.0 | Some-college | 10.0 | Married-civ-spouse | Machine-op-inspct | Husband | Black | Male | 7688.0 | 0.0 | 40.0 | United-States | >50K |
#adult_sample_aggregate['native-country'][adult_sample_aggregate['native-country'] != 'United-States'] = 'Non-US'
adult_aggregate.loc[adult_aggregate['native-country'] != 'United-States', 'native-country'] = 'Non-US'
adult_aggregate
| age | workclass | fnlwgt | education | education-num | marital-status | occupation | relationship | race | sex | capital-gain | capital-loss | hours-per-week | native-country | class | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 25.0 | Private | 226802.0 | 11th | 7.0 | Never-married | Machine-op-inspct | Own-child | Black | X | 0.0 | 0.0 | 40.0 | United-States | X |
| 1 | 25.0 | Private | 226802.0 | 11th | 7.0 | Never-married | Machine-op-inspct | Own-child | Black | Male | 0.0 | 0.0 | 40.0 | United-States | <=50K |
| 2 | 38.0 | Private | 89814.0 | HS-grad | 9.0 | Married-civ-spouse | Farming-fishing | Husband | White | Male | 0.0 | 0.0 | 50.0 | United-States | <=50K |
| 3 | 28.0 | Local-gov | 336951.0 | Other-Grad | 12.0 | Married-civ-spouse | Protective-serv | Husband | White | Male | 0.0 | 0.0 | 40.0 | United-States | >50K |
| 4 | 44.0 | Private | 160323.0 | Some-college | 10.0 | Married-civ-spouse | Machine-op-inspct | Husband | Black | Male | 7688.0 | 0.0 | 40.0 | United-States | >50K |
| 5 | 18.0 | ? | 103497.0 | Some-college | 10.0 | Never-married | ? | Own-child | White | Female | 0.0 | 0.0 | 30.0 | United-States | <=50K |
| 6 | 34.0 | Private | 198693.0 | 10th | 6.0 | Never-married | Other-service | Not-in-family | White | Male | 0.0 | 0.0 | 30.0 | United-States | <=50K |
| 7 | 29.0 | ? | 227026.0 | HS-grad | 9.0 | Never-married | ? | Unmarried | Black | Male | 0.0 | 0.0 | 40.0 | United-States | <=50K |
| 8 | 63.0 | Self-emp-not-inc | 104626.0 | Other-Grad | 15.0 | Married-civ-spouse | Prof-specialty | Husband | White | Male | 3103.0 | 0.0 | 32.0 | United-States | >50K |
| 9 | 24.0 | Private | 369667.0 | Some-college | 10.0 | Never-married | Other-service | Unmarried | White | Female | 0.0 | 0.0 | 40.0 | United-States | <=50K |
| 10 | 55.0 | Private | 104996.0 | 7th-8th | 4.0 | Married-civ-spouse | Craft-repair | Husband | White | Male | 0.0 | 0.0 | 10.0 | United-States | <=50K |
| 11 | 65.0 | Private | 184454.0 | HS-grad | 9.0 | Married-civ-spouse | Machine-op-inspct | Husband | White | Male | 6418.0 | 0.0 | 40.0 | United-States | >50K |
| 12 | 36.0 | Federal-gov | 212465.0 | Other-Grad | 13.0 | Married-civ-spouse | Adm-clerical | Husband | White | Male | 0.0 | 0.0 | 40.0 | United-States | <=50K |
| 13 | 26.0 | Private | 82091.0 | HS-grad | 9.0 | Never-married | Adm-clerical | Not-in-family | White | Female | 0.0 | 0.0 | 39.0 | United-States | <=50K |
| 14 | 58.0 | ? | 299831.0 | HS-grad | 9.0 | Married-civ-spouse | ? | Husband | White | Male | 0.0 | 0.0 | 35.0 | United-States | <=50K |
| 15 | 48.0 | Private | 279724.0 | HS-grad | 9.0 | Married-civ-spouse | Machine-op-inspct | Husband | White | Male | 3103.0 | 0.0 | 48.0 | United-States | >50K |
| 16 | 43.0 | Private | 346189.0 | Other-Grad | 14.0 | Married-civ-spouse | Exec-managerial | Husband | White | Male | 0.0 | 0.0 | 50.0 | United-States | >50K |
| 17 | 20.0 | State-gov | 444554.0 | Some-college | 10.0 | Never-married | Other-service | Own-child | White | Male | 0.0 | 0.0 | 25.0 | United-States | <=50K |
| 18 | 43.0 | Private | 128354.0 | HS-grad | 9.0 | Married-civ-spouse | Adm-clerical | Wife | White | Female | 0.0 | 0.0 | 30.0 | United-States | <=50K |
| 19 | 37.0 | Private | 60548.0 | HS-grad | 9.0 | Widowed | Machine-op-inspct | Unmarried | White | Female | 0.0 | 0.0 | 20.0 | United-States | <=50K |
| 20 | 40.0 | Private | 85019.0 | Other-Grad | 16.0 | Married-civ-spouse | Prof-specialty | Husband | Asian-Pac-Islander | Male | 0.0 | 0.0 | 45.0 | Non-US | >50K |
| 21 | 34.0 | Private | 107914.0 | Other-Grad | 13.0 | Married-civ-spouse | Tech-support | Husband | White | Male | 0.0 | 0.0 | 47.0 | United-States | >50K |
| 22 | 34.0 | Private | 238588.0 | Some-college | 10.0 | Never-married | Other-service | Own-child | Black | Female | 0.0 | 0.0 | 35.0 | United-States | <=50K |
| 23 | 72.0 | ? | 132015.0 | 7th-8th | 4.0 | Divorced | ? | Not-in-family | White | Female | 0.0 | 0.0 | 6.0 | United-States | <=50K |
| 24 | 25.0 | Private | 220931.0 | Other-Grad | 13.0 | Never-married | Prof-specialty | Not-in-family | White | Male | 0.0 | 0.0 | 43.0 | Non-US | <=50K |
| 25 | 25.0 | Private | 205947.0 | Other-Grad | 13.0 | Married-civ-spouse | Prof-specialty | Husband | White | Male | 0.0 | 0.0 | 40.0 | United-States | <=50K |
| 26 | 45.0 | Self-emp-not-inc | 432824.0 | HS-grad | 9.0 | Married-civ-spouse | Craft-repair | Husband | White | Male | 7298.0 | 0.0 | 90.0 | United-States | >50K |
| 27 | 22.0 | Private | 236427.0 | HS-grad | 9.0 | Never-married | Adm-clerical | Own-child | White | Male | 0.0 | 0.0 | 20.0 | United-States | <=50K |
| 28 | 23.0 | Private | 134446.0 | HS-grad | 9.0 | Separated | Machine-op-inspct | Unmarried | Black | Male | 0.0 | 0.0 | 54.0 | United-States | <=50K |
| 29 | 54.0 | Private | 99516.0 | HS-grad | 9.0 | Married-civ-spouse | Craft-repair | Husband | White | Male | 0.0 | 0.0 | 35.0 | United-States | <=50K |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 48813 | 30.0 | ? | 33811.0 | Other-Grad | 13.0 | Never-married | ? | Not-in-family | Asian-Pac-Islander | Female | 0.0 | 0.0 | 99.0 | United-States | <=50K |
| 48814 | 34.0 | Private | 204461.0 | Other-Grad | 16.0 | Married-civ-spouse | Prof-specialty | Husband | White | Male | 0.0 | 0.0 | 60.0 | United-States | >50K |
| 48815 | 54.0 | Private | 337992.0 | Other-Grad | 13.0 | Married-civ-spouse | Exec-managerial | Husband | Asian-Pac-Islander | Male | 0.0 | 0.0 | 50.0 | Non-US | >50K |
| 48816 | 37.0 | Private | 179137.0 | Some-college | 10.0 | Divorced | Adm-clerical | Unmarried | White | Female | 0.0 | 0.0 | 39.0 | United-States | <=50K |
| 48817 | 22.0 | Private | 325033.0 | 12th | 8.0 | Never-married | Protective-serv | Own-child | Black | Male | 0.0 | 0.0 | 35.0 | United-States | <=50K |
| 48818 | 34.0 | Private | 160216.0 | Other-Grad | 13.0 | Never-married | Exec-managerial | Not-in-family | White | Female | 0.0 | 0.0 | 55.0 | United-States | >50K |
| 48819 | 30.0 | Private | 345898.0 | HS-grad | 9.0 | Never-married | Craft-repair | Not-in-family | Black | Male | 0.0 | 0.0 | 46.0 | United-States | <=50K |
| 48820 | 38.0 | Private | 139180.0 | Other-Grad | 13.0 | Divorced | Prof-specialty | Unmarried | Black | Female | 15020.0 | 0.0 | 45.0 | United-States | >50K |
| 48821 | 71.0 | ? | 287372.0 | Other-Grad | 16.0 | Married-civ-spouse | ? | Husband | White | Male | 0.0 | 0.0 | 10.0 | United-States | >50K |
| 48822 | 45.0 | State-gov | 252208.0 | HS-grad | 9.0 | Separated | Adm-clerical | Own-child | White | Female | 0.0 | 0.0 | 40.0 | United-States | <=50K |
| 48823 | 41.0 | ? | 202822.0 | HS-grad | 9.0 | Separated | ? | Not-in-family | Black | Female | 0.0 | 0.0 | 32.0 | United-States | <=50K |
| 48824 | 72.0 | ? | 129912.0 | HS-grad | 9.0 | Married-civ-spouse | ? | Husband | White | Male | 0.0 | 0.0 | 25.0 | United-States | <=50K |
| 48825 | 45.0 | Local-gov | 119199.0 | Other-Grad | 12.0 | Divorced | Prof-specialty | Unmarried | White | Female | 0.0 | 0.0 | 48.0 | United-States | <=50K |
| 48826 | 31.0 | Private | 199655.0 | Other-Grad | 14.0 | Divorced | Other-service | Not-in-family | Other | Female | 0.0 | 0.0 | 30.0 | United-States | <=50K |
| 48827 | 39.0 | Local-gov | 111499.0 | Other-Grad | 12.0 | Married-civ-spouse | Adm-clerical | Wife | White | Female | 0.0 | 0.0 | 20.0 | United-States | >50K |
| 48828 | 37.0 | Private | 198216.0 | Other-Grad | 12.0 | Divorced | Tech-support | Not-in-family | White | Female | 0.0 | 0.0 | 40.0 | United-States | <=50K |
| 48829 | 43.0 | Private | 260761.0 | HS-grad | 9.0 | Married-civ-spouse | Machine-op-inspct | Husband | White | Male | 0.0 | 0.0 | 40.0 | Non-US | <=50K |
| 48830 | 65.0 | Self-emp-not-inc | 99359.0 | Other-Grad | 15.0 | Never-married | Prof-specialty | Not-in-family | White | Male | 1086.0 | 0.0 | 60.0 | United-States | <=50K |
| 48831 | 43.0 | State-gov | 255835.0 | Some-college | 10.0 | Divorced | Adm-clerical | Other-relative | White | Female | 0.0 | 0.0 | 40.0 | United-States | <=50K |
| 48832 | 43.0 | Self-emp-not-inc | 27242.0 | Some-college | 10.0 | Married-civ-spouse | Craft-repair | Husband | White | Male | 0.0 | 0.0 | 50.0 | United-States | <=50K |
| 48833 | 32.0 | Private | 34066.0 | 10th | 6.0 | Married-civ-spouse | Handlers-cleaners | Husband | Amer-Indian-Eskimo | Male | 0.0 | 0.0 | 40.0 | United-States | <=50K |
| 48834 | 43.0 | Private | 84661.0 | Other-Grad | 11.0 | Married-civ-spouse | Sales | Husband | White | Male | 0.0 | 0.0 | 45.0 | United-States | <=50K |
| 48835 | 32.0 | Private | 116138.0 | Other-Grad | 14.0 | Never-married | Tech-support | Not-in-family | Asian-Pac-Islander | Male | 0.0 | 0.0 | 11.0 | Non-US | <=50K |
| 48836 | 53.0 | Private | 321865.0 | Other-Grad | 14.0 | Married-civ-spouse | Exec-managerial | Husband | White | Male | 0.0 | 0.0 | 40.0 | United-States | >50K |
| 48837 | 22.0 | Private | 310152.0 | Some-college | 10.0 | Never-married | Protective-serv | Not-in-family | White | Male | 0.0 | 0.0 | 40.0 | United-States | <=50K |
| 48838 | 27.0 | Private | 257302.0 | Other-Grad | 12.0 | Married-civ-spouse | Tech-support | Wife | White | Female | 0.0 | 0.0 | 38.0 | United-States | <=50K |
| 48839 | 40.0 | Private | 154374.0 | HS-grad | 9.0 | Married-civ-spouse | Machine-op-inspct | Husband | White | Male | 0.0 | 0.0 | 40.0 | United-States | >50K |
| 48840 | 58.0 | Private | 151910.0 | HS-grad | 9.0 | Widowed | Adm-clerical | Unmarried | White | Female | 0.0 | 0.0 | 40.0 | United-States | <=50K |
| 48841 | 22.0 | Private | 201490.0 | HS-grad | 9.0 | Never-married | Adm-clerical | Own-child | White | Male | 0.0 | 0.0 | 20.0 | United-States | <=50K |
| 48842 | 52.0 | Self-emp-inc | 287927.0 | HS-grad | 9.0 | Married-civ-spouse | Exec-managerial | Wife | White | Female | 15024.0 | 0.0 | 40.0 | United-States | >50K |
48843 rows × 15 columns
adult_immigrants = adult[adult['native-country'] != 'United-States']
adult_immigrants
| age | workclass | fnlwgt | education | education-num | marital-status | occupation | relationship | race | sex | capital-gain | capital-loss | hours-per-week | native-country | class | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 20 | 40.0 | Private | 85019.0 | Doctorate | 16.0 | Married-civ-spouse | Prof-specialty | Husband | Asian-Pac-Islander | Male | 0.0 | 0.0 | 45.0 | ? | >50K |
| 24 | 25.0 | Private | 220931.0 | Bachelors | 13.0 | Never-married | Prof-specialty | Not-in-family | White | Male | 0.0 | 0.0 | 43.0 | Peru | <=50K |
| 38 | 22.0 | Private | 248446.0 | 5th-6th | 3.0 | Never-married | Priv-house-serv | Not-in-family | White | Male | 0.0 | 0.0 | 50.0 | Guatemala | <=50K |
| 47 | 39.0 | Private | 290208.0 | 7th-8th | 4.0 | Married-civ-spouse | Craft-repair | Husband | White | Male | 0.0 | 0.0 | 40.0 | Mexico | <=50K |
| 55 | 38.0 | Private | 219446.0 | 9th | 5.0 | Married-spouse-absent | Exec-managerial | Not-in-family | White | Male | 0.0 | 0.0 | 54.0 | Mexico | <=50K |
| 66 | 41.0 | Private | 109912.0 | Bachelors | 13.0 | Never-married | Other-service | Not-in-family | White | Female | 0.0 | 0.0 | 40.0 | ? | <=50K |
| 73 | 30.0 | Private | 229636.0 | HS-grad | 9.0 | Married-civ-spouse | Machine-op-inspct | Husband | White | Male | 0.0 | 0.0 | 40.0 | Mexico | <=50K |
| 75 | 46.0 | Private | 269034.0 | Some-college | 10.0 | Married-civ-spouse | Craft-repair | Husband | Other | Male | 0.0 | 0.0 | 40.0 | Dominican-Republic | <=50K |
| 84 | 44.0 | Self-emp-inc | 223881.0 | HS-grad | 9.0 | Married-civ-spouse | Craft-repair | Husband | White | Male | 99999.0 | 0.0 | 50.0 | ? | >50K |
| 129 | 27.0 | Self-emp-not-inc | 115438.0 | HS-grad | 9.0 | Married-civ-spouse | Craft-repair | Husband | White | Male | 0.0 | 0.0 | 60.0 | Ireland | >50K |
| 142 | 18.0 | Private | 262118.0 | Some-college | 10.0 | Never-married | Adm-clerical | Own-child | Asian-Pac-Islander | Female | 0.0 | 0.0 | 22.0 | Germany | <=50K |
| 189 | 34.0 | State-gov | 513100.0 | Bachelors | 13.0 | Married-spouse-absent | Farming-fishing | Not-in-family | Black | Male | 0.0 | 0.0 | 40.0 | ? | <=50K |
| 218 | 21.0 | Private | 447488.0 | 5th-6th | 3.0 | Never-married | Machine-op-inspct | Other-relative | White | Male | 0.0 | 0.0 | 38.0 | Mexico | <=50K |
| 221 | 34.0 | Private | 162312.0 | Bachelors | 13.0 | Married-civ-spouse | Adm-clerical | Husband | Asian-Pac-Islander | Male | 0.0 | 0.0 | 40.0 | Philippines | <=50K |
| 222 | 25.0 | Private | 77698.0 | HS-grad | 9.0 | Never-married | Machine-op-inspct | Not-in-family | Asian-Pac-Islander | Female | 0.0 | 0.0 | 40.0 | Philippines | <=50K |
| 233 | 55.0 | Private | 119751.0 | Masters | 14.0 | Never-married | Exec-managerial | Unmarried | Asian-Pac-Islander | Female | 0.0 | 0.0 | 50.0 | Thailand | <=50K |
| 238 | 42.0 | Private | 227968.0 | HS-grad | 9.0 | Never-married | Other-service | Unmarried | Black | Female | 0.0 | 0.0 | 28.0 | Haiti | <=50K |
| 254 | 42.0 | Federal-gov | 177937.0 | Bachelors | 13.0 | Never-married | Prof-specialty | Not-in-family | White | Male | 0.0 | 0.0 | 40.0 | ? | <=50K |
| 297 | 30.0 | Private | 236543.0 | 9th | 5.0 | Married-civ-spouse | Other-service | Husband | White | Male | 0.0 | 0.0 | 32.0 | El-Salvador | >50K |
| 302 | 30.0 | Private | 169269.0 | 11th | 7.0 | Never-married | Handlers-cleaners | Other-relative | White | Male | 0.0 | 1721.0 | 38.0 | Puerto-Rico | <=50K |
| 306 | 37.0 | Local-gov | 263690.0 | Bachelors | 13.0 | Never-married | Prof-specialty | Unmarried | Black | Male | 0.0 | 0.0 | 40.0 | ? | <=50K |
| 310 | 51.0 | Self-emp-not-inc | 136708.0 | HS-grad | 9.0 | Married-civ-spouse | Sales | Husband | Asian-Pac-Islander | Male | 3103.0 | 0.0 | 84.0 | Vietnam | <=50K |
| 324 | 52.0 | Private | 55608.0 | 1st-4th | 2.0 | Married-civ-spouse | Machine-op-inspct | Husband | White | Male | 0.0 | 0.0 | 40.0 | Mexico | <=50K |
| 325 | 26.0 | Private | 248057.0 | HS-grad | 9.0 | Separated | Handlers-cleaners | Own-child | White | Male | 0.0 | 0.0 | 40.0 | Puerto-Rico | <=50K |
| 330 | 64.0 | Self-emp-not-inc | 71807.0 | Doctorate | 16.0 | Married-civ-spouse | Prof-specialty | Husband | White | Male | 15024.0 | 0.0 | 50.0 | ? | >50K |
| 337 | 44.0 | Private | 196234.0 | 9th | 5.0 | Divorced | Other-service | Unmarried | White | Female | 0.0 | 0.0 | 55.0 | Dominican-Republic | <=50K |
| 343 | 31.0 | Private | 179415.0 | 5th-6th | 3.0 | Married-civ-spouse | Other-service | Husband | White | Male | 0.0 | 0.0 | 40.0 | Mexico | <=50K |
| 351 | 45.0 | ? | 319993.0 | HS-grad | 9.0 | Widowed | ? | Unmarried | White | Female | 0.0 | 0.0 | 40.0 | Mexico | <=50K |
| 370 | 26.0 | Private | 190873.0 | 10th | 6.0 | Divorced | Other-service | Unmarried | White | Female | 0.0 | 0.0 | 40.0 | Germany | <=50K |
| 372 | 41.0 | Private | 203217.0 | 7th-8th | 4.0 | Separated | Craft-repair | Not-in-family | White | Male | 0.0 | 0.0 | 40.0 | Mexico | <=50K |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 48594 | 34.0 | Private | 346034.0 | 12th | 8.0 | Married-spouse-absent | Handlers-cleaners | Unmarried | White | Male | 0.0 | 0.0 | 35.0 | Mexico | <=50K |
| 48595 | 41.0 | Private | 144460.0 | Some-college | 10.0 | Divorced | Machine-op-inspct | Own-child | White | Male | 0.0 | 0.0 | 40.0 | Italy | <=50K |
| 48602 | 37.0 | Private | 328466.0 | HS-grad | 9.0 | Married-civ-spouse | Craft-repair | Husband | White | Male | 0.0 | 0.0 | 72.0 | Mexico | <=50K |
| 48609 | 28.0 | Private | 330466.0 | Bachelors | 13.0 | Never-married | Prof-specialty | Not-in-family | Asian-Pac-Islander | Male | 0.0 | 0.0 | 40.0 | Hong | <=50K |
| 48614 | 26.0 | Private | 233777.0 | HS-grad | 9.0 | Married-civ-spouse | Craft-repair | Husband | White | Male | 0.0 | 0.0 | 72.0 | Mexico | <=50K |
| 48616 | 24.0 | Private | 176580.0 | 5th-6th | 3.0 | Married-spouse-absent | Farming-fishing | Not-in-family | White | Male | 0.0 | 0.0 | 40.0 | Mexico | <=50K |
| 48619 | 23.0 | Private | 194951.0 | Bachelors | 13.0 | Never-married | Prof-specialty | Not-in-family | Asian-Pac-Islander | Male | 0.0 | 0.0 | 55.0 | Ireland | <=50K |
| 48623 | 74.0 | Self-emp-not-inc | 199136.0 | Bachelors | 13.0 | Widowed | Craft-repair | Not-in-family | White | Male | 15831.0 | 0.0 | 8.0 | Germany | >50K |
| 48634 | 28.0 | Private | 119793.0 | HS-grad | 9.0 | Married-civ-spouse | Craft-repair | Husband | White | Male | 0.0 | 0.0 | 40.0 | Portugal | <=50K |
| 48641 | 46.0 | Private | 139514.0 | Preschool | 1.0 | Married-civ-spouse | Machine-op-inspct | Other-relative | Black | Male | 0.0 | 0.0 | 75.0 | Dominican-Republic | <=50K |
| 48652 | 53.0 | Self-emp-not-inc | 137547.0 | Prof-school | 15.0 | Never-married | Prof-specialty | Not-in-family | Asian-Pac-Islander | Male | 27828.0 | 0.0 | 40.0 | Philippines | >50K |
| 48653 | 49.0 | Self-emp-not-inc | 111959.0 | Bachelors | 13.0 | Married-civ-spouse | Exec-managerial | Husband | White | Male | 0.0 | 0.0 | 60.0 | Scotland | >50K |
| 48658 | 40.0 | Private | 306225.0 | HS-grad | 9.0 | Divorced | Craft-repair | Not-in-family | Asian-Pac-Islander | Female | 0.0 | 0.0 | 40.0 | Japan | <=50K |
| 48660 | 39.0 | Private | 214896.0 | HS-grad | 9.0 | Separated | Other-service | Not-in-family | White | Female | 0.0 | 0.0 | 40.0 | El-Salvador | <=50K |
| 48685 | 48.0 | Private | 325372.0 | 1st-4th | 2.0 | Married-civ-spouse | Machine-op-inspct | Husband | White | Male | 0.0 | 0.0 | 40.0 | Portugal | <=50K |
| 48695 | 45.0 | Private | 199590.0 | 5th-6th | 3.0 | Married-civ-spouse | Machine-op-inspct | Husband | White | Male | 0.0 | 0.0 | 40.0 | ? | <=50K |
| 48707 | 23.0 | Private | 180771.0 | 1st-4th | 2.0 | Married-civ-spouse | Machine-op-inspct | Wife | Amer-Indian-Eskimo | Female | 0.0 | 0.0 | 35.0 | Mexico | <=50K |
| 48710 | 39.0 | Federal-gov | 110622.0 | Bachelors | 13.0 | Married-civ-spouse | Adm-clerical | Wife | Asian-Pac-Islander | Female | 0.0 | 0.0 | 40.0 | Philippines | <=50K |
| 48714 | 36.0 | Private | 208068.0 | Preschool | 1.0 | Divorced | Other-service | Not-in-family | Other | Male | 0.0 | 0.0 | 72.0 | Mexico | <=50K |
| 48731 | 44.0 | Self-emp-inc | 71556.0 | Masters | 14.0 | Married-civ-spouse | Sales | Husband | White | Male | 0.0 | 0.0 | 50.0 | ? | >50K |
| 48738 | 30.0 | Self-emp-not-inc | 261943.0 | 11th | 7.0 | Married-spouse-absent | Craft-repair | Not-in-family | White | Male | 0.0 | 0.0 | 30.0 | Honduras | <=50K |
| 48741 | 85.0 | Private | 98611.0 | Bachelors | 13.0 | Married-civ-spouse | Exec-managerial | Husband | White | Male | 0.0 | 0.0 | 3.0 | Poland | <=50K |
| 48751 | 58.0 | Self-emp-inc | 181974.0 | Doctorate | 16.0 | Never-married | Prof-specialty | Not-in-family | White | Female | 0.0 | 0.0 | 99.0 | ? | <=50K |
| 48774 | 42.0 | Self-emp-not-inc | 217597.0 | HS-grad | 9.0 | Divorced | Sales | Own-child | White | Male | 0.0 | 0.0 | 50.0 | ? | <=50K |
| 48790 | 45.0 | Private | 155093.0 | 10th | 6.0 | Divorced | Other-service | Not-in-family | Black | Female | 0.0 | 0.0 | 38.0 | Dominican-Republic | <=50K |
| 48792 | 39.0 | Private | 107302.0 | HS-grad | 9.0 | Married-civ-spouse | Prof-specialty | Husband | White | Male | 0.0 | 0.0 | 45.0 | ? | >50K |
| 48807 | 81.0 | ? | 120478.0 | Assoc-voc | 11.0 | Divorced | ? | Unmarried | White | Female | 0.0 | 0.0 | 1.0 | ? | <=50K |
| 48815 | 54.0 | Private | 337992.0 | Bachelors | 13.0 | Married-civ-spouse | Exec-managerial | Husband | Asian-Pac-Islander | Male | 0.0 | 0.0 | 50.0 | Japan | >50K |
| 48829 | 43.0 | Private | 260761.0 | HS-grad | 9.0 | Married-civ-spouse | Machine-op-inspct | Husband | White | Male | 0.0 | 0.0 | 40.0 | Mexico | <=50K |
| 48835 | 32.0 | Private | 116138.0 | Masters | 14.0 | Never-married | Tech-support | Not-in-family | Asian-Pac-Islander | Male | 0.0 | 0.0 | 11.0 | Taiwan | <=50K |
5010 rows × 15 columns