import pandas as pd
shopping = pd.read_excel('ShoppingBaskets.xls')
shopping_data = shopping.drop('BasketNo', axis=1)
shopping_data.head()
ThinkPad X220 | Asus EeePC | HP Laserjet P2055 | 2 GB DDR3 RAM | 8 GB DDR3 RAM | Lenovo Tablet Sleeve | Netbook-Schutzhülle | HP CE50 Toner | LT Laser Maus | LT Minimaus | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 |
1 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 0 |
2 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 1 | 0 |
3 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 |
4 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 |
from orangecontrib.associate.fpgrowth import *
itemsets = dict(frequent_itemsets(shopping_data.values, 0.20)) # take care to use .values
rows = []
for itemset, support in itemsets.items():
domain_names= [shopping_data.columns[item_index] for item_index in itemset]
rows.append((len(itemset), support, domain_names))
item_set_table = pd.DataFrame(rows, columns=["size", "support", "items"])
item_set_table.sort_values('support', ascending = False)
size | support | items | |
---|---|---|---|
1 | 1 | 6 | [Asus EeePC] |
4 | 1 | 5 | [2 GB DDR3 RAM] |
5 | 2 | 5 | [Asus EeePC, 2 GB DDR3 RAM] |
33 | 1 | 5 | [LT Minimaus] |
34 | 2 | 4 | [Asus EeePC, LT Minimaus] |
24 | 1 | 4 | [LT Laser Maus] |
15 | 3 | 4 | [Asus EeePC, 2 GB DDR3 RAM, Netbook-Schutzhülle ] |
14 | 2 | 4 | [2 GB DDR3 RAM, Netbook-Schutzhülle ] |
13 | 2 | 4 | [Asus EeePC, Netbook-Schutzhülle ] |
12 | 1 | 4 | [Netbook-Schutzhülle ] |
0 | 1 | 4 | [ThinkPad X220 ] |
8 | 1 | 4 | [Lenovo Tablet Sleeve] |
18 | 2 | 3 | [HP Laserjet P2055, HP CE50 Toner] |
36 | 3 | 3 | [Asus EeePC, 2 GB DDR3 RAM, LT Minimaus] |
38 | 2 | 3 | [LT Minimaus, Netbook-Schutzhülle ] |
39 | 3 | 3 | [Asus EeePC, Netbook-Schutzhülle , LT Minimaus] |
40 | 3 | 3 | [LT Minimaus, 2 GB DDR3 RAM, Netbook-Schutzhül... |
35 | 2 | 3 | [LT Minimaus, 2 GB DDR3 RAM] |
41 | 4 | 3 | [Asus EeePC, 2 GB DDR3 RAM, Netbook-Schutzhüll... |
2 | 1 | 3 | [HP Laserjet P2055] |
16 | 1 | 3 | [HP CE50 Toner] |
9 | 2 | 3 | [ThinkPad X220 , Lenovo Tablet Sleeve] |
19 | 3 | 2 | [ThinkPad X220 , HP Laserjet P2055, HP CE50 To... |
31 | 3 | 2 | [LT Laser Maus, 2 GB DDR3 RAM, Netbook-Schutzh... |
3 | 2 | 2 | [ThinkPad X220 , HP Laserjet P2055] |
6 | 1 | 2 | [8 GB DDR3 RAM] |
37 | 2 | 2 | [LT Minimaus, Lenovo Tablet Sleeve] |
7 | 2 | 2 | [ThinkPad X220 , 8 GB DDR3 RAM] |
10 | 2 | 2 | [HP Laserjet P2055, Lenovo Tablet Sleeve] |
11 | 3 | 2 | [ThinkPad X220 , HP Laserjet P2055, Lenovo Tab... |
32 | 4 | 2 | [LT Laser Maus, Asus EeePC, 2 GB DDR3 RAM, Net... |
30 | 3 | 2 | [LT Laser Maus, Asus EeePC, Netbook-Schutzhülle ] |
20 | 2 | 2 | [Lenovo Tablet Sleeve, HP CE50 Toner] |
29 | 2 | 2 | [LT Laser Maus, Netbook-Schutzhülle ] |
28 | 3 | 2 | [LT Laser Maus, Asus EeePC, 2 GB DDR3 RAM] |
27 | 2 | 2 | [LT Laser Maus, 2 GB DDR3 RAM] |
26 | 2 | 2 | [LT Laser Maus, Asus EeePC] |
25 | 2 | 2 | [ThinkPad X220 , LT Laser Maus] |
23 | 4 | 2 | [ThinkPad X220 , HP Laserjet P2055, Lenovo Tab... |
22 | 3 | 2 | [HP Laserjet P2055, Lenovo Tablet Sleeve, HP C... |
17 | 2 | 2 | [ThinkPad X220 , HP CE50 Toner] |
21 | 3 | 2 | [ThinkPad X220 , Lenovo Tablet Sleeve, HP CE50... |
rules = association_rules(itemsets, 0.70)
rows = []
for premis, conclusion, sup, conf,cov, strength, lift, leverage in rules_stats(rules, itemsets, len(shopping_data)):
premis_names = [shopping_data.columns[item_index] for item_index in premis]
conclusion_names = [shopping_data.columns[item_index] for item_index in conclusion]
rows.append((premis_names, conclusion_names, sup, conf,cov, strength, lift, leverage))
pd.DataFrame(rows, columns = ['Premis', 'Conclusion', 'Support', 'Confidence', 'Coverage', 'Strength', 'Lift', 'Leverage'])
Premis | Conclusion | Support | Confidence | Coverage | Strength | Lift | Leverage | |
---|---|---|---|---|---|---|---|---|
0 | [HP Laserjet P2055, Lenovo Tablet Sleeve, HP C... | [ThinkPad X220 ] | 2 | 1.000000 | 0.2 | 2.000000 | 2.500000 | 0.12 |
1 | [ThinkPad X220 , Lenovo Tablet Sleeve, HP CE50... | [HP Laserjet P2055] | 2 | 1.000000 | 0.2 | 1.500000 | 3.333333 | 0.14 |
2 | [Lenovo Tablet Sleeve, HP CE50 Toner] | [ThinkPad X220 , HP Laserjet P2055] | 2 | 1.000000 | 0.2 | 1.000000 | 5.000000 | 0.16 |
3 | [ThinkPad X220 , HP Laserjet P2055, HP CE50 To... | [Lenovo Tablet Sleeve] | 2 | 1.000000 | 0.2 | 2.000000 | 2.500000 | 0.12 |
4 | [ThinkPad X220 , HP CE50 Toner] | [HP Laserjet P2055, Lenovo Tablet Sleeve] | 2 | 1.000000 | 0.2 | 1.000000 | 5.000000 | 0.16 |
5 | [ThinkPad X220 , HP Laserjet P2055, Lenovo Tab... | [HP CE50 Toner] | 2 | 1.000000 | 0.2 | 1.500000 | 3.333333 | 0.14 |
6 | [HP Laserjet P2055, Lenovo Tablet Sleeve] | [ThinkPad X220 , HP CE50 Toner] | 2 | 1.000000 | 0.2 | 1.000000 | 5.000000 | 0.16 |
7 | [ThinkPad X220 , HP Laserjet P2055] | [Lenovo Tablet Sleeve, HP CE50 Toner] | 2 | 1.000000 | 0.2 | 1.000000 | 5.000000 | 0.16 |
8 | [LT Laser Maus, 2 GB DDR3 RAM, Netbook-Schutzh... | [Asus EeePC] | 2 | 1.000000 | 0.2 | 3.000000 | 1.666667 | 0.08 |
9 | [LT Laser Maus, Asus EeePC, Netbook-Schutzhülle ] | [2 GB DDR3 RAM] | 2 | 1.000000 | 0.2 | 2.500000 | 2.000000 | 0.10 |
10 | [LT Laser Maus, Netbook-Schutzhülle ] | [Asus EeePC, 2 GB DDR3 RAM] | 2 | 1.000000 | 0.2 | 2.500000 | 2.000000 | 0.10 |
11 | [LT Laser Maus, Asus EeePC, 2 GB DDR3 RAM] | [Netbook-Schutzhülle ] | 2 | 1.000000 | 0.2 | 2.000000 | 2.500000 | 0.12 |
12 | [LT Laser Maus, 2 GB DDR3 RAM] | [Asus EeePC, Netbook-Schutzhülle ] | 2 | 1.000000 | 0.2 | 2.000000 | 2.500000 | 0.12 |
13 | [LT Laser Maus, Asus EeePC] | [2 GB DDR3 RAM, Netbook-Schutzhülle ] | 2 | 1.000000 | 0.2 | 2.000000 | 2.500000 | 0.12 |
14 | [LT Minimaus, 2 GB DDR3 RAM, Netbook-Schutzhül... | [Asus EeePC] | 3 | 1.000000 | 0.3 | 2.000000 | 1.666667 | 0.12 |
15 | [Asus EeePC, Netbook-Schutzhülle , LT Minimaus] | [2 GB DDR3 RAM] | 3 | 1.000000 | 0.3 | 1.666667 | 2.000000 | 0.15 |
16 | [LT Minimaus, Netbook-Schutzhülle ] | [Asus EeePC, 2 GB DDR3 RAM] | 3 | 1.000000 | 0.3 | 1.666667 | 2.000000 | 0.15 |
17 | [Asus EeePC, 2 GB DDR3 RAM, LT Minimaus] | [Netbook-Schutzhülle ] | 3 | 1.000000 | 0.3 | 1.333333 | 2.500000 | 0.18 |
18 | [LT Minimaus, 2 GB DDR3 RAM] | [Asus EeePC, Netbook-Schutzhülle ] | 3 | 1.000000 | 0.3 | 1.333333 | 2.500000 | 0.18 |
19 | [Asus EeePC, LT Minimaus] | [2 GB DDR3 RAM, Netbook-Schutzhülle ] | 3 | 0.750000 | 0.4 | 1.000000 | 1.875000 | 0.14 |
20 | [Asus EeePC, 2 GB DDR3 RAM, Netbook-Schutzhülle ] | [LT Minimaus] | 3 | 0.750000 | 0.4 | 1.250000 | 1.500000 | 0.10 |
21 | [2 GB DDR3 RAM, Netbook-Schutzhülle ] | [LT Minimaus, Asus EeePC] | 3 | 0.750000 | 0.4 | 1.000000 | 1.875000 | 0.14 |
22 | [Asus EeePC, Netbook-Schutzhülle ] | [LT Minimaus, 2 GB DDR3 RAM] | 3 | 0.750000 | 0.4 | 0.750000 | 2.500000 | 0.18 |
23 | [Netbook-Schutzhülle ] | [LT Minimaus, 2 GB DDR3 RAM, Asus EeePC] | 3 | 0.750000 | 0.4 | 0.750000 | 2.500000 | 0.18 |
24 | [HP Laserjet P2055, Lenovo Tablet Sleeve] | [ThinkPad X220 ] | 2 | 1.000000 | 0.2 | 2.000000 | 2.500000 | 0.12 |
25 | [ThinkPad X220 , HP Laserjet P2055] | [Lenovo Tablet Sleeve] | 2 | 1.000000 | 0.2 | 2.000000 | 2.500000 | 0.12 |
26 | [2 GB DDR3 RAM, Netbook-Schutzhülle ] | [Asus EeePC] | 4 | 1.000000 | 0.4 | 1.500000 | 1.666667 | 0.16 |
27 | [Asus EeePC, Netbook-Schutzhülle ] | [2 GB DDR3 RAM] | 4 | 1.000000 | 0.4 | 1.250000 | 2.000000 | 0.20 |
28 | [Netbook-Schutzhülle ] | [Asus EeePC, 2 GB DDR3 RAM] | 4 | 1.000000 | 0.4 | 1.250000 | 2.000000 | 0.20 |
29 | [Asus EeePC, 2 GB DDR3 RAM] | [Netbook-Schutzhülle ] | 4 | 0.800000 | 0.5 | 0.800000 | 2.000000 | 0.20 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
35 | [Lenovo Tablet Sleeve, HP CE50 Toner] | [HP Laserjet P2055] | 2 | 1.000000 | 0.2 | 1.500000 | 3.333333 | 0.14 |
36 | [HP Laserjet P2055, Lenovo Tablet Sleeve] | [HP CE50 Toner] | 2 | 1.000000 | 0.2 | 1.500000 | 3.333333 | 0.14 |
37 | [LT Laser Maus, 2 GB DDR3 RAM] | [Asus EeePC] | 2 | 1.000000 | 0.2 | 3.000000 | 1.666667 | 0.08 |
38 | [LT Laser Maus, Asus EeePC] | [2 GB DDR3 RAM] | 2 | 1.000000 | 0.2 | 2.500000 | 2.000000 | 0.10 |
39 | [LT Laser Maus, Netbook-Schutzhülle ] | [Asus EeePC] | 2 | 1.000000 | 0.2 | 3.000000 | 1.666667 | 0.08 |
40 | [LT Laser Maus, Asus EeePC] | [Netbook-Schutzhülle ] | 2 | 1.000000 | 0.2 | 2.000000 | 2.500000 | 0.12 |
41 | [LT Laser Maus, Netbook-Schutzhülle ] | [2 GB DDR3 RAM] | 2 | 1.000000 | 0.2 | 2.500000 | 2.000000 | 0.10 |
42 | [LT Laser Maus, 2 GB DDR3 RAM] | [Netbook-Schutzhülle ] | 2 | 1.000000 | 0.2 | 2.000000 | 2.500000 | 0.12 |
43 | [LT Minimaus, 2 GB DDR3 RAM] | [Asus EeePC] | 3 | 1.000000 | 0.3 | 2.000000 | 1.666667 | 0.12 |
44 | [Asus EeePC, LT Minimaus] | [2 GB DDR3 RAM] | 3 | 0.750000 | 0.4 | 1.250000 | 1.500000 | 0.10 |
45 | [LT Minimaus, Netbook-Schutzhülle ] | [Asus EeePC] | 3 | 1.000000 | 0.3 | 2.000000 | 1.666667 | 0.12 |
46 | [Asus EeePC, LT Minimaus] | [Netbook-Schutzhülle ] | 3 | 0.750000 | 0.4 | 1.000000 | 1.875000 | 0.14 |
47 | [Asus EeePC, Netbook-Schutzhülle ] | [LT Minimaus] | 3 | 0.750000 | 0.4 | 1.250000 | 1.500000 | 0.10 |
48 | [Netbook-Schutzhülle ] | [LT Minimaus, Asus EeePC] | 3 | 0.750000 | 0.4 | 1.000000 | 1.875000 | 0.14 |
49 | [2 GB DDR3 RAM, Netbook-Schutzhülle ] | [LT Minimaus] | 3 | 0.750000 | 0.4 | 1.250000 | 1.500000 | 0.10 |
50 | [Netbook-Schutzhülle ] | [LT Minimaus, 2 GB DDR3 RAM] | 3 | 0.750000 | 0.4 | 0.750000 | 2.500000 | 0.18 |
51 | [LT Minimaus, Netbook-Schutzhülle ] | [2 GB DDR3 RAM] | 3 | 1.000000 | 0.3 | 1.666667 | 2.000000 | 0.15 |
52 | [LT Minimaus, 2 GB DDR3 RAM] | [Netbook-Schutzhülle ] | 3 | 1.000000 | 0.3 | 1.333333 | 2.500000 | 0.18 |
53 | [2 GB DDR3 RAM] | [Asus EeePC] | 5 | 1.000000 | 0.5 | 1.200000 | 1.666667 | 0.20 |
54 | [Asus EeePC] | [2 GB DDR3 RAM] | 5 | 0.833333 | 0.6 | 0.833333 | 1.666667 | 0.20 |
55 | [8 GB DDR3 RAM] | [ThinkPad X220 ] | 2 | 1.000000 | 0.2 | 2.000000 | 2.500000 | 0.12 |
56 | [Lenovo Tablet Sleeve] | [ThinkPad X220 ] | 3 | 0.750000 | 0.4 | 1.000000 | 1.875000 | 0.14 |
57 | [ThinkPad X220 ] | [Lenovo Tablet Sleeve] | 3 | 0.750000 | 0.4 | 1.000000 | 1.875000 | 0.14 |
58 | [Netbook-Schutzhülle ] | [Asus EeePC] | 4 | 1.000000 | 0.4 | 1.500000 | 1.666667 | 0.16 |
59 | [Netbook-Schutzhülle ] | [2 GB DDR3 RAM] | 4 | 1.000000 | 0.4 | 1.250000 | 2.000000 | 0.20 |
60 | [2 GB DDR3 RAM] | [Netbook-Schutzhülle ] | 4 | 0.800000 | 0.5 | 0.800000 | 2.000000 | 0.20 |
61 | [HP CE50 Toner] | [HP Laserjet P2055] | 3 | 1.000000 | 0.3 | 1.000000 | 3.333333 | 0.21 |
62 | [HP Laserjet P2055] | [HP CE50 Toner] | 3 | 1.000000 | 0.3 | 1.000000 | 3.333333 | 0.21 |
63 | [LT Minimaus] | [Asus EeePC] | 4 | 0.800000 | 0.5 | 1.200000 | 1.333333 | 0.10 |
64 | [Netbook-Schutzhülle ] | [LT Minimaus] | 3 | 0.750000 | 0.4 | 1.250000 | 1.500000 | 0.10 |
65 rows × 8 columns
from scipy.io import arff
adult_arff_data, adult_arff_meta = arff.loadarff(open('adult-dataset-tweaked.arff', 'r'))
adult = pd.DataFrame(adult_arff_data)
adult = adult.applymap(lambda x: x.decode('utf8') if hasattr(x, 'decode') else x)
adult.head()
age | workclass | fnlwgt | education | education-num | marital-status | occupation | relationship | race | sex | capital-gain | capital-loss | hours-per-week | native-country | class | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 25.0 | Private | 226802.0 | 11th | 7.0 | Never-married | Machine-op-inspct | Own-child | Black | X | 0.0 | 0.0 | 40.0 | United-States | X |
1 | 25.0 | Private | 226802.0 | 11th | 7.0 | Never-married | Machine-op-inspct | Own-child | Black | Male | 0.0 | 0.0 | 40.0 | United-States | <=50K |
2 | 38.0 | Private | 89814.0 | HS-grad | 9.0 | Married-civ-spouse | Farming-fishing | Husband | White | Male | 0.0 | 0.0 | 50.0 | United-States | <=50K |
3 | 28.0 | Local-gov | 336951.0 | Assoc-acdm | 12.0 | Married-civ-spouse | Protective-serv | Husband | White | Male | 0.0 | 0.0 | 40.0 | United-States | >50K |
4 | 44.0 | Private | 160323.0 | Some-college | 10.0 | Married-civ-spouse | Machine-op-inspct | Husband | Black | Male | 7688.0 | 0.0 | 40.0 | United-States | >50K |
adult_aggregate = adult.copy()
adult_aggregate['education'].replace(['Bachelors','Masters','Assoc-acdm','Prof-school','Assoc-voc', 'Doctorate'], 'Other-Grad', inplace=True)
adult_aggregate.head()
age | workclass | fnlwgt | education | education-num | marital-status | occupation | relationship | race | sex | capital-gain | capital-loss | hours-per-week | native-country | class | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 25.0 | Private | 226802.0 | 11th | 7.0 | Never-married | Machine-op-inspct | Own-child | Black | X | 0.0 | 0.0 | 40.0 | United-States | X |
1 | 25.0 | Private | 226802.0 | 11th | 7.0 | Never-married | Machine-op-inspct | Own-child | Black | Male | 0.0 | 0.0 | 40.0 | United-States | <=50K |
2 | 38.0 | Private | 89814.0 | HS-grad | 9.0 | Married-civ-spouse | Farming-fishing | Husband | White | Male | 0.0 | 0.0 | 50.0 | United-States | <=50K |
3 | 28.0 | Local-gov | 336951.0 | Other-Grad | 12.0 | Married-civ-spouse | Protective-serv | Husband | White | Male | 0.0 | 0.0 | 40.0 | United-States | >50K |
4 | 44.0 | Private | 160323.0 | Some-college | 10.0 | Married-civ-spouse | Machine-op-inspct | Husband | Black | Male | 7688.0 | 0.0 | 40.0 | United-States | >50K |
#adult_sample_aggregate['native-country'][adult_sample_aggregate['native-country'] != 'United-States'] = 'Non-US'
adult_aggregate.loc[adult_aggregate['native-country'] != 'United-States', 'native-country'] = 'Non-US'
adult_aggregate
age | workclass | fnlwgt | education | education-num | marital-status | occupation | relationship | race | sex | capital-gain | capital-loss | hours-per-week | native-country | class | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 25.0 | Private | 226802.0 | 11th | 7.0 | Never-married | Machine-op-inspct | Own-child | Black | X | 0.0 | 0.0 | 40.0 | United-States | X |
1 | 25.0 | Private | 226802.0 | 11th | 7.0 | Never-married | Machine-op-inspct | Own-child | Black | Male | 0.0 | 0.0 | 40.0 | United-States | <=50K |
2 | 38.0 | Private | 89814.0 | HS-grad | 9.0 | Married-civ-spouse | Farming-fishing | Husband | White | Male | 0.0 | 0.0 | 50.0 | United-States | <=50K |
3 | 28.0 | Local-gov | 336951.0 | Other-Grad | 12.0 | Married-civ-spouse | Protective-serv | Husband | White | Male | 0.0 | 0.0 | 40.0 | United-States | >50K |
4 | 44.0 | Private | 160323.0 | Some-college | 10.0 | Married-civ-spouse | Machine-op-inspct | Husband | Black | Male | 7688.0 | 0.0 | 40.0 | United-States | >50K |
5 | 18.0 | ? | 103497.0 | Some-college | 10.0 | Never-married | ? | Own-child | White | Female | 0.0 | 0.0 | 30.0 | United-States | <=50K |
6 | 34.0 | Private | 198693.0 | 10th | 6.0 | Never-married | Other-service | Not-in-family | White | Male | 0.0 | 0.0 | 30.0 | United-States | <=50K |
7 | 29.0 | ? | 227026.0 | HS-grad | 9.0 | Never-married | ? | Unmarried | Black | Male | 0.0 | 0.0 | 40.0 | United-States | <=50K |
8 | 63.0 | Self-emp-not-inc | 104626.0 | Other-Grad | 15.0 | Married-civ-spouse | Prof-specialty | Husband | White | Male | 3103.0 | 0.0 | 32.0 | United-States | >50K |
9 | 24.0 | Private | 369667.0 | Some-college | 10.0 | Never-married | Other-service | Unmarried | White | Female | 0.0 | 0.0 | 40.0 | United-States | <=50K |
10 | 55.0 | Private | 104996.0 | 7th-8th | 4.0 | Married-civ-spouse | Craft-repair | Husband | White | Male | 0.0 | 0.0 | 10.0 | United-States | <=50K |
11 | 65.0 | Private | 184454.0 | HS-grad | 9.0 | Married-civ-spouse | Machine-op-inspct | Husband | White | Male | 6418.0 | 0.0 | 40.0 | United-States | >50K |
12 | 36.0 | Federal-gov | 212465.0 | Other-Grad | 13.0 | Married-civ-spouse | Adm-clerical | Husband | White | Male | 0.0 | 0.0 | 40.0 | United-States | <=50K |
13 | 26.0 | Private | 82091.0 | HS-grad | 9.0 | Never-married | Adm-clerical | Not-in-family | White | Female | 0.0 | 0.0 | 39.0 | United-States | <=50K |
14 | 58.0 | ? | 299831.0 | HS-grad | 9.0 | Married-civ-spouse | ? | Husband | White | Male | 0.0 | 0.0 | 35.0 | United-States | <=50K |
15 | 48.0 | Private | 279724.0 | HS-grad | 9.0 | Married-civ-spouse | Machine-op-inspct | Husband | White | Male | 3103.0 | 0.0 | 48.0 | United-States | >50K |
16 | 43.0 | Private | 346189.0 | Other-Grad | 14.0 | Married-civ-spouse | Exec-managerial | Husband | White | Male | 0.0 | 0.0 | 50.0 | United-States | >50K |
17 | 20.0 | State-gov | 444554.0 | Some-college | 10.0 | Never-married | Other-service | Own-child | White | Male | 0.0 | 0.0 | 25.0 | United-States | <=50K |
18 | 43.0 | Private | 128354.0 | HS-grad | 9.0 | Married-civ-spouse | Adm-clerical | Wife | White | Female | 0.0 | 0.0 | 30.0 | United-States | <=50K |
19 | 37.0 | Private | 60548.0 | HS-grad | 9.0 | Widowed | Machine-op-inspct | Unmarried | White | Female | 0.0 | 0.0 | 20.0 | United-States | <=50K |
20 | 40.0 | Private | 85019.0 | Other-Grad | 16.0 | Married-civ-spouse | Prof-specialty | Husband | Asian-Pac-Islander | Male | 0.0 | 0.0 | 45.0 | Non-US | >50K |
21 | 34.0 | Private | 107914.0 | Other-Grad | 13.0 | Married-civ-spouse | Tech-support | Husband | White | Male | 0.0 | 0.0 | 47.0 | United-States | >50K |
22 | 34.0 | Private | 238588.0 | Some-college | 10.0 | Never-married | Other-service | Own-child | Black | Female | 0.0 | 0.0 | 35.0 | United-States | <=50K |
23 | 72.0 | ? | 132015.0 | 7th-8th | 4.0 | Divorced | ? | Not-in-family | White | Female | 0.0 | 0.0 | 6.0 | United-States | <=50K |
24 | 25.0 | Private | 220931.0 | Other-Grad | 13.0 | Never-married | Prof-specialty | Not-in-family | White | Male | 0.0 | 0.0 | 43.0 | Non-US | <=50K |
25 | 25.0 | Private | 205947.0 | Other-Grad | 13.0 | Married-civ-spouse | Prof-specialty | Husband | White | Male | 0.0 | 0.0 | 40.0 | United-States | <=50K |
26 | 45.0 | Self-emp-not-inc | 432824.0 | HS-grad | 9.0 | Married-civ-spouse | Craft-repair | Husband | White | Male | 7298.0 | 0.0 | 90.0 | United-States | >50K |
27 | 22.0 | Private | 236427.0 | HS-grad | 9.0 | Never-married | Adm-clerical | Own-child | White | Male | 0.0 | 0.0 | 20.0 | United-States | <=50K |
28 | 23.0 | Private | 134446.0 | HS-grad | 9.0 | Separated | Machine-op-inspct | Unmarried | Black | Male | 0.0 | 0.0 | 54.0 | United-States | <=50K |
29 | 54.0 | Private | 99516.0 | HS-grad | 9.0 | Married-civ-spouse | Craft-repair | Husband | White | Male | 0.0 | 0.0 | 35.0 | United-States | <=50K |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
48813 | 30.0 | ? | 33811.0 | Other-Grad | 13.0 | Never-married | ? | Not-in-family | Asian-Pac-Islander | Female | 0.0 | 0.0 | 99.0 | United-States | <=50K |
48814 | 34.0 | Private | 204461.0 | Other-Grad | 16.0 | Married-civ-spouse | Prof-specialty | Husband | White | Male | 0.0 | 0.0 | 60.0 | United-States | >50K |
48815 | 54.0 | Private | 337992.0 | Other-Grad | 13.0 | Married-civ-spouse | Exec-managerial | Husband | Asian-Pac-Islander | Male | 0.0 | 0.0 | 50.0 | Non-US | >50K |
48816 | 37.0 | Private | 179137.0 | Some-college | 10.0 | Divorced | Adm-clerical | Unmarried | White | Female | 0.0 | 0.0 | 39.0 | United-States | <=50K |
48817 | 22.0 | Private | 325033.0 | 12th | 8.0 | Never-married | Protective-serv | Own-child | Black | Male | 0.0 | 0.0 | 35.0 | United-States | <=50K |
48818 | 34.0 | Private | 160216.0 | Other-Grad | 13.0 | Never-married | Exec-managerial | Not-in-family | White | Female | 0.0 | 0.0 | 55.0 | United-States | >50K |
48819 | 30.0 | Private | 345898.0 | HS-grad | 9.0 | Never-married | Craft-repair | Not-in-family | Black | Male | 0.0 | 0.0 | 46.0 | United-States | <=50K |
48820 | 38.0 | Private | 139180.0 | Other-Grad | 13.0 | Divorced | Prof-specialty | Unmarried | Black | Female | 15020.0 | 0.0 | 45.0 | United-States | >50K |
48821 | 71.0 | ? | 287372.0 | Other-Grad | 16.0 | Married-civ-spouse | ? | Husband | White | Male | 0.0 | 0.0 | 10.0 | United-States | >50K |
48822 | 45.0 | State-gov | 252208.0 | HS-grad | 9.0 | Separated | Adm-clerical | Own-child | White | Female | 0.0 | 0.0 | 40.0 | United-States | <=50K |
48823 | 41.0 | ? | 202822.0 | HS-grad | 9.0 | Separated | ? | Not-in-family | Black | Female | 0.0 | 0.0 | 32.0 | United-States | <=50K |
48824 | 72.0 | ? | 129912.0 | HS-grad | 9.0 | Married-civ-spouse | ? | Husband | White | Male | 0.0 | 0.0 | 25.0 | United-States | <=50K |
48825 | 45.0 | Local-gov | 119199.0 | Other-Grad | 12.0 | Divorced | Prof-specialty | Unmarried | White | Female | 0.0 | 0.0 | 48.0 | United-States | <=50K |
48826 | 31.0 | Private | 199655.0 | Other-Grad | 14.0 | Divorced | Other-service | Not-in-family | Other | Female | 0.0 | 0.0 | 30.0 | United-States | <=50K |
48827 | 39.0 | Local-gov | 111499.0 | Other-Grad | 12.0 | Married-civ-spouse | Adm-clerical | Wife | White | Female | 0.0 | 0.0 | 20.0 | United-States | >50K |
48828 | 37.0 | Private | 198216.0 | Other-Grad | 12.0 | Divorced | Tech-support | Not-in-family | White | Female | 0.0 | 0.0 | 40.0 | United-States | <=50K |
48829 | 43.0 | Private | 260761.0 | HS-grad | 9.0 | Married-civ-spouse | Machine-op-inspct | Husband | White | Male | 0.0 | 0.0 | 40.0 | Non-US | <=50K |
48830 | 65.0 | Self-emp-not-inc | 99359.0 | Other-Grad | 15.0 | Never-married | Prof-specialty | Not-in-family | White | Male | 1086.0 | 0.0 | 60.0 | United-States | <=50K |
48831 | 43.0 | State-gov | 255835.0 | Some-college | 10.0 | Divorced | Adm-clerical | Other-relative | White | Female | 0.0 | 0.0 | 40.0 | United-States | <=50K |
48832 | 43.0 | Self-emp-not-inc | 27242.0 | Some-college | 10.0 | Married-civ-spouse | Craft-repair | Husband | White | Male | 0.0 | 0.0 | 50.0 | United-States | <=50K |
48833 | 32.0 | Private | 34066.0 | 10th | 6.0 | Married-civ-spouse | Handlers-cleaners | Husband | Amer-Indian-Eskimo | Male | 0.0 | 0.0 | 40.0 | United-States | <=50K |
48834 | 43.0 | Private | 84661.0 | Other-Grad | 11.0 | Married-civ-spouse | Sales | Husband | White | Male | 0.0 | 0.0 | 45.0 | United-States | <=50K |
48835 | 32.0 | Private | 116138.0 | Other-Grad | 14.0 | Never-married | Tech-support | Not-in-family | Asian-Pac-Islander | Male | 0.0 | 0.0 | 11.0 | Non-US | <=50K |
48836 | 53.0 | Private | 321865.0 | Other-Grad | 14.0 | Married-civ-spouse | Exec-managerial | Husband | White | Male | 0.0 | 0.0 | 40.0 | United-States | >50K |
48837 | 22.0 | Private | 310152.0 | Some-college | 10.0 | Never-married | Protective-serv | Not-in-family | White | Male | 0.0 | 0.0 | 40.0 | United-States | <=50K |
48838 | 27.0 | Private | 257302.0 | Other-Grad | 12.0 | Married-civ-spouse | Tech-support | Wife | White | Female | 0.0 | 0.0 | 38.0 | United-States | <=50K |
48839 | 40.0 | Private | 154374.0 | HS-grad | 9.0 | Married-civ-spouse | Machine-op-inspct | Husband | White | Male | 0.0 | 0.0 | 40.0 | United-States | >50K |
48840 | 58.0 | Private | 151910.0 | HS-grad | 9.0 | Widowed | Adm-clerical | Unmarried | White | Female | 0.0 | 0.0 | 40.0 | United-States | <=50K |
48841 | 22.0 | Private | 201490.0 | HS-grad | 9.0 | Never-married | Adm-clerical | Own-child | White | Male | 0.0 | 0.0 | 20.0 | United-States | <=50K |
48842 | 52.0 | Self-emp-inc | 287927.0 | HS-grad | 9.0 | Married-civ-spouse | Exec-managerial | Wife | White | Female | 15024.0 | 0.0 | 40.0 | United-States | >50K |
48843 rows × 15 columns
adult_immigrants = adult[adult['native-country'] != 'United-States']
adult_immigrants
age | workclass | fnlwgt | education | education-num | marital-status | occupation | relationship | race | sex | capital-gain | capital-loss | hours-per-week | native-country | class | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
20 | 40.0 | Private | 85019.0 | Doctorate | 16.0 | Married-civ-spouse | Prof-specialty | Husband | Asian-Pac-Islander | Male | 0.0 | 0.0 | 45.0 | ? | >50K |
24 | 25.0 | Private | 220931.0 | Bachelors | 13.0 | Never-married | Prof-specialty | Not-in-family | White | Male | 0.0 | 0.0 | 43.0 | Peru | <=50K |
38 | 22.0 | Private | 248446.0 | 5th-6th | 3.0 | Never-married | Priv-house-serv | Not-in-family | White | Male | 0.0 | 0.0 | 50.0 | Guatemala | <=50K |
47 | 39.0 | Private | 290208.0 | 7th-8th | 4.0 | Married-civ-spouse | Craft-repair | Husband | White | Male | 0.0 | 0.0 | 40.0 | Mexico | <=50K |
55 | 38.0 | Private | 219446.0 | 9th | 5.0 | Married-spouse-absent | Exec-managerial | Not-in-family | White | Male | 0.0 | 0.0 | 54.0 | Mexico | <=50K |
66 | 41.0 | Private | 109912.0 | Bachelors | 13.0 | Never-married | Other-service | Not-in-family | White | Female | 0.0 | 0.0 | 40.0 | ? | <=50K |
73 | 30.0 | Private | 229636.0 | HS-grad | 9.0 | Married-civ-spouse | Machine-op-inspct | Husband | White | Male | 0.0 | 0.0 | 40.0 | Mexico | <=50K |
75 | 46.0 | Private | 269034.0 | Some-college | 10.0 | Married-civ-spouse | Craft-repair | Husband | Other | Male | 0.0 | 0.0 | 40.0 | Dominican-Republic | <=50K |
84 | 44.0 | Self-emp-inc | 223881.0 | HS-grad | 9.0 | Married-civ-spouse | Craft-repair | Husband | White | Male | 99999.0 | 0.0 | 50.0 | ? | >50K |
129 | 27.0 | Self-emp-not-inc | 115438.0 | HS-grad | 9.0 | Married-civ-spouse | Craft-repair | Husband | White | Male | 0.0 | 0.0 | 60.0 | Ireland | >50K |
142 | 18.0 | Private | 262118.0 | Some-college | 10.0 | Never-married | Adm-clerical | Own-child | Asian-Pac-Islander | Female | 0.0 | 0.0 | 22.0 | Germany | <=50K |
189 | 34.0 | State-gov | 513100.0 | Bachelors | 13.0 | Married-spouse-absent | Farming-fishing | Not-in-family | Black | Male | 0.0 | 0.0 | 40.0 | ? | <=50K |
218 | 21.0 | Private | 447488.0 | 5th-6th | 3.0 | Never-married | Machine-op-inspct | Other-relative | White | Male | 0.0 | 0.0 | 38.0 | Mexico | <=50K |
221 | 34.0 | Private | 162312.0 | Bachelors | 13.0 | Married-civ-spouse | Adm-clerical | Husband | Asian-Pac-Islander | Male | 0.0 | 0.0 | 40.0 | Philippines | <=50K |
222 | 25.0 | Private | 77698.0 | HS-grad | 9.0 | Never-married | Machine-op-inspct | Not-in-family | Asian-Pac-Islander | Female | 0.0 | 0.0 | 40.0 | Philippines | <=50K |
233 | 55.0 | Private | 119751.0 | Masters | 14.0 | Never-married | Exec-managerial | Unmarried | Asian-Pac-Islander | Female | 0.0 | 0.0 | 50.0 | Thailand | <=50K |
238 | 42.0 | Private | 227968.0 | HS-grad | 9.0 | Never-married | Other-service | Unmarried | Black | Female | 0.0 | 0.0 | 28.0 | Haiti | <=50K |
254 | 42.0 | Federal-gov | 177937.0 | Bachelors | 13.0 | Never-married | Prof-specialty | Not-in-family | White | Male | 0.0 | 0.0 | 40.0 | ? | <=50K |
297 | 30.0 | Private | 236543.0 | 9th | 5.0 | Married-civ-spouse | Other-service | Husband | White | Male | 0.0 | 0.0 | 32.0 | El-Salvador | >50K |
302 | 30.0 | Private | 169269.0 | 11th | 7.0 | Never-married | Handlers-cleaners | Other-relative | White | Male | 0.0 | 1721.0 | 38.0 | Puerto-Rico | <=50K |
306 | 37.0 | Local-gov | 263690.0 | Bachelors | 13.0 | Never-married | Prof-specialty | Unmarried | Black | Male | 0.0 | 0.0 | 40.0 | ? | <=50K |
310 | 51.0 | Self-emp-not-inc | 136708.0 | HS-grad | 9.0 | Married-civ-spouse | Sales | Husband | Asian-Pac-Islander | Male | 3103.0 | 0.0 | 84.0 | Vietnam | <=50K |
324 | 52.0 | Private | 55608.0 | 1st-4th | 2.0 | Married-civ-spouse | Machine-op-inspct | Husband | White | Male | 0.0 | 0.0 | 40.0 | Mexico | <=50K |
325 | 26.0 | Private | 248057.0 | HS-grad | 9.0 | Separated | Handlers-cleaners | Own-child | White | Male | 0.0 | 0.0 | 40.0 | Puerto-Rico | <=50K |
330 | 64.0 | Self-emp-not-inc | 71807.0 | Doctorate | 16.0 | Married-civ-spouse | Prof-specialty | Husband | White | Male | 15024.0 | 0.0 | 50.0 | ? | >50K |
337 | 44.0 | Private | 196234.0 | 9th | 5.0 | Divorced | Other-service | Unmarried | White | Female | 0.0 | 0.0 | 55.0 | Dominican-Republic | <=50K |
343 | 31.0 | Private | 179415.0 | 5th-6th | 3.0 | Married-civ-spouse | Other-service | Husband | White | Male | 0.0 | 0.0 | 40.0 | Mexico | <=50K |
351 | 45.0 | ? | 319993.0 | HS-grad | 9.0 | Widowed | ? | Unmarried | White | Female | 0.0 | 0.0 | 40.0 | Mexico | <=50K |
370 | 26.0 | Private | 190873.0 | 10th | 6.0 | Divorced | Other-service | Unmarried | White | Female | 0.0 | 0.0 | 40.0 | Germany | <=50K |
372 | 41.0 | Private | 203217.0 | 7th-8th | 4.0 | Separated | Craft-repair | Not-in-family | White | Male | 0.0 | 0.0 | 40.0 | Mexico | <=50K |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
48594 | 34.0 | Private | 346034.0 | 12th | 8.0 | Married-spouse-absent | Handlers-cleaners | Unmarried | White | Male | 0.0 | 0.0 | 35.0 | Mexico | <=50K |
48595 | 41.0 | Private | 144460.0 | Some-college | 10.0 | Divorced | Machine-op-inspct | Own-child | White | Male | 0.0 | 0.0 | 40.0 | Italy | <=50K |
48602 | 37.0 | Private | 328466.0 | HS-grad | 9.0 | Married-civ-spouse | Craft-repair | Husband | White | Male | 0.0 | 0.0 | 72.0 | Mexico | <=50K |
48609 | 28.0 | Private | 330466.0 | Bachelors | 13.0 | Never-married | Prof-specialty | Not-in-family | Asian-Pac-Islander | Male | 0.0 | 0.0 | 40.0 | Hong | <=50K |
48614 | 26.0 | Private | 233777.0 | HS-grad | 9.0 | Married-civ-spouse | Craft-repair | Husband | White | Male | 0.0 | 0.0 | 72.0 | Mexico | <=50K |
48616 | 24.0 | Private | 176580.0 | 5th-6th | 3.0 | Married-spouse-absent | Farming-fishing | Not-in-family | White | Male | 0.0 | 0.0 | 40.0 | Mexico | <=50K |
48619 | 23.0 | Private | 194951.0 | Bachelors | 13.0 | Never-married | Prof-specialty | Not-in-family | Asian-Pac-Islander | Male | 0.0 | 0.0 | 55.0 | Ireland | <=50K |
48623 | 74.0 | Self-emp-not-inc | 199136.0 | Bachelors | 13.0 | Widowed | Craft-repair | Not-in-family | White | Male | 15831.0 | 0.0 | 8.0 | Germany | >50K |
48634 | 28.0 | Private | 119793.0 | HS-grad | 9.0 | Married-civ-spouse | Craft-repair | Husband | White | Male | 0.0 | 0.0 | 40.0 | Portugal | <=50K |
48641 | 46.0 | Private | 139514.0 | Preschool | 1.0 | Married-civ-spouse | Machine-op-inspct | Other-relative | Black | Male | 0.0 | 0.0 | 75.0 | Dominican-Republic | <=50K |
48652 | 53.0 | Self-emp-not-inc | 137547.0 | Prof-school | 15.0 | Never-married | Prof-specialty | Not-in-family | Asian-Pac-Islander | Male | 27828.0 | 0.0 | 40.0 | Philippines | >50K |
48653 | 49.0 | Self-emp-not-inc | 111959.0 | Bachelors | 13.0 | Married-civ-spouse | Exec-managerial | Husband | White | Male | 0.0 | 0.0 | 60.0 | Scotland | >50K |
48658 | 40.0 | Private | 306225.0 | HS-grad | 9.0 | Divorced | Craft-repair | Not-in-family | Asian-Pac-Islander | Female | 0.0 | 0.0 | 40.0 | Japan | <=50K |
48660 | 39.0 | Private | 214896.0 | HS-grad | 9.0 | Separated | Other-service | Not-in-family | White | Female | 0.0 | 0.0 | 40.0 | El-Salvador | <=50K |
48685 | 48.0 | Private | 325372.0 | 1st-4th | 2.0 | Married-civ-spouse | Machine-op-inspct | Husband | White | Male | 0.0 | 0.0 | 40.0 | Portugal | <=50K |
48695 | 45.0 | Private | 199590.0 | 5th-6th | 3.0 | Married-civ-spouse | Machine-op-inspct | Husband | White | Male | 0.0 | 0.0 | 40.0 | ? | <=50K |
48707 | 23.0 | Private | 180771.0 | 1st-4th | 2.0 | Married-civ-spouse | Machine-op-inspct | Wife | Amer-Indian-Eskimo | Female | 0.0 | 0.0 | 35.0 | Mexico | <=50K |
48710 | 39.0 | Federal-gov | 110622.0 | Bachelors | 13.0 | Married-civ-spouse | Adm-clerical | Wife | Asian-Pac-Islander | Female | 0.0 | 0.0 | 40.0 | Philippines | <=50K |
48714 | 36.0 | Private | 208068.0 | Preschool | 1.0 | Divorced | Other-service | Not-in-family | Other | Male | 0.0 | 0.0 | 72.0 | Mexico | <=50K |
48731 | 44.0 | Self-emp-inc | 71556.0 | Masters | 14.0 | Married-civ-spouse | Sales | Husband | White | Male | 0.0 | 0.0 | 50.0 | ? | >50K |
48738 | 30.0 | Self-emp-not-inc | 261943.0 | 11th | 7.0 | Married-spouse-absent | Craft-repair | Not-in-family | White | Male | 0.0 | 0.0 | 30.0 | Honduras | <=50K |
48741 | 85.0 | Private | 98611.0 | Bachelors | 13.0 | Married-civ-spouse | Exec-managerial | Husband | White | Male | 0.0 | 0.0 | 3.0 | Poland | <=50K |
48751 | 58.0 | Self-emp-inc | 181974.0 | Doctorate | 16.0 | Never-married | Prof-specialty | Not-in-family | White | Female | 0.0 | 0.0 | 99.0 | ? | <=50K |
48774 | 42.0 | Self-emp-not-inc | 217597.0 | HS-grad | 9.0 | Divorced | Sales | Own-child | White | Male | 0.0 | 0.0 | 50.0 | ? | <=50K |
48790 | 45.0 | Private | 155093.0 | 10th | 6.0 | Divorced | Other-service | Not-in-family | Black | Female | 0.0 | 0.0 | 38.0 | Dominican-Republic | <=50K |
48792 | 39.0 | Private | 107302.0 | HS-grad | 9.0 | Married-civ-spouse | Prof-specialty | Husband | White | Male | 0.0 | 0.0 | 45.0 | ? | >50K |
48807 | 81.0 | ? | 120478.0 | Assoc-voc | 11.0 | Divorced | ? | Unmarried | White | Female | 0.0 | 0.0 | 1.0 | ? | <=50K |
48815 | 54.0 | Private | 337992.0 | Bachelors | 13.0 | Married-civ-spouse | Exec-managerial | Husband | Asian-Pac-Islander | Male | 0.0 | 0.0 | 50.0 | Japan | >50K |
48829 | 43.0 | Private | 260761.0 | HS-grad | 9.0 | Married-civ-spouse | Machine-op-inspct | Husband | White | Male | 0.0 | 0.0 | 40.0 | Mexico | <=50K |
48835 | 32.0 | Private | 116138.0 | Masters | 14.0 | Never-married | Tech-support | Not-in-family | Asian-Pac-Islander | Male | 0.0 | 0.0 | 11.0 | Taiwan | <=50K |
5010 rows × 15 columns