import warnings
warnings.filterwarnings('ignore')
import pandas as pd
df = pd.read_csv("sku_transactions_1.csv")
df
# https://www.kaggle.com/datasets/marian447/retail-store-sales-transactions
Date | Customer_ID | Transaction_ID | SKU | Quantity | Sales_Amount | |
---|---|---|---|---|---|---|
0 | 02/01/2016 | 5541 | 4892 | 29DS1 | 2.0 | 18.67 |
1 | 02/01/2016 | 3403 | 4956 | VW5YU | 2.0 | 18.27 |
2 | 02/01/2016 | 9584 | 4955 | P8FMZ | 1.0 | 52.45 |
3 | 02/01/2016 | 9584 | 4955 | LEYMS | 2.0 | 21.98 |
4 | 02/01/2016 | 8411 | 4954 | RH5J5 | 1.0 | 4.77 |
... | ... | ... | ... | ... | ... | ... |
63985 | 30/06/2016 | 16993 | 32275 | 1H4DM | 1.0 | 37.63 |
63986 | 30/06/2016 | 16993 | 32275 | QGK3S | 1.0 | 5.69 |
63987 | 30/06/2016 | 21463 | 32277 | BMGRM | 1.0 | 6.96 |
63988 | 30/06/2016 | 16993 | 32275 | LPLTZ | 4.0 | 15.60 |
63989 | 30/06/2016 | 16993 | 32275 | XFAEF | 1.0 | 6.90 |
63990 rows × 6 columns
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 63990 entries, 0 to 63989 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Date 63990 non-null object 1 Customer_ID 63990 non-null int64 2 Transaction_ID 63990 non-null int64 3 SKU 63990 non-null object 4 Quantity 63990 non-null float64 5 Sales_Amount 63990 non-null float64 dtypes: float64(2), int64(2), object(2) memory usage: 2.9+ MB
df['Customer_ID'] = df['Customer_ID'].astype(str)
df['Transaction_ID'] = df['Transaction_ID'].astype(str)
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 63990 entries, 0 to 63989 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Date 63990 non-null object 1 Customer_ID 63990 non-null object 2 Transaction_ID 63990 non-null object 3 SKU 63990 non-null object 4 Quantity 63990 non-null float64 5 Sales_Amount 63990 non-null float64 dtypes: float64(2), object(4) memory usage: 2.9+ MB
# Describe
df.describe()
Quantity | Sales_Amount | |
---|---|---|
count | 63990.000000 | 63990.000000 |
mean | 1.463645 | 11.907501 |
std | 3.368498 | 19.128415 |
min | 0.100000 | 0.020000 |
25% | 1.000000 | 4.190000 |
50% | 1.000000 | 6.870000 |
75% | 1.000000 | 12.270000 |
max | 176.000000 | 531.270000 |
# Explore categorical data
print(f"Number of unique transactions {df['Transaction_ID'].nunique()}")
print(f"Number of unique customers {df['Customer_ID'].nunique()}")
print(f"Number of unique SKU items {df['SKU'].nunique()}")
print(f"Period of invoices {df['Date'].min()} - {df['Date'].max()}")
print(f'SKU items in data set:')
df['SKU'].value_counts()
Number of unique transactions 31694 Number of unique customers 15113 Number of unique SKU items 4448 Period of invoices 01/02/2016 - 31/05/2016 SKU items in data set:
UNJKW 911 COWU2 432 C6TXL 361 OV1P9 344 M6J9W 308 ... 24X9E 1 DGQ1A 1 2EIBT 1 SD3UI 1 KH5IL 1 Name: SKU, Length: 4448, dtype: int64
# df['Transaction_ID'].is_unique
df = df.set_index(['Transaction_ID', 'SKU'])
# df.index.is_unique
#show index
df.index
MultiIndex([( '4892', '29DS1'), ( '4956', 'VW5YU'), ( '4955', 'P8FMZ'), ( '4955', 'LEYMS'), ( '4954', 'RH5J5'), ( '4953', '7UQEH'), ( '4952', '4D95F'), ( '4951', 'JE62Y'), ( '4951', 'DWJC4'), ( '4951', 'W53F2'), ... ('32276', '7IE9S'), ('32275', 'OI6EH'), ('32275', 'LJ26I'), ('32275', '2984Y'), ('32275', 'LG6Y2'), ('32275', '1H4DM'), ('32275', 'QGK3S'), ('32277', 'BMGRM'), ('32275', 'LPLTZ'), ('32275', 'XFAEF')], names=['Transaction_ID', 'SKU'], length=63990)
# Get all details of specific transaction
df.loc['32408',:]
Date | Customer_ID | Quantity | Sales_Amount | |
---|---|---|---|---|
SKU | ||||
72CDS | 30/06/2016 | 15632 | 1.0 | 0.82 |
JZJII | 30/06/2016 | 15632 | 1.0 | 0.18 |
# Range of transactions
df = df.sort_index()
df.loc['32275':'32276',:]
Date | Customer_ID | Quantity | Sales_Amount | ||
---|---|---|---|---|---|
Transaction_ID | SKU | ||||
32275 | 1H4DM | 30/06/2016 | 16993 | 1.0 | 37.63 |
2984Y | 30/06/2016 | 16993 | 3.0 | 11.65 | |
360JS | 30/06/2016 | 16993 | 3.0 | 8.18 | |
LG6Y2 | 30/06/2016 | 16993 | 3.0 | 22.09 | |
LJ26I | 30/06/2016 | 16993 | 3.0 | 64.97 | |
LPLTZ | 30/06/2016 | 16993 | 4.0 | 15.60 | |
NDCX9 | 30/06/2016 | 16993 | 1.0 | 2.84 | |
OI6EH | 30/06/2016 | 16993 | 1.0 | 27.67 | |
QGK3S | 30/06/2016 | 16993 | 1.0 | 5.69 | |
XFAEF | 30/06/2016 | 16993 | 1.0 | 6.90 | |
32276 | 7IE9S | 30/06/2016 | 16993 | 1.0 | 8.38 |
ZVTO4 | 30/06/2016 | 16993 | 1.0 | 4.57 |
# Get all trandactions of specifc SKU
df.loc[:,'F9JVE',:].head()
Date | Customer_ID | Quantity | Sales_Amount | |
---|---|---|---|---|
Transaction_ID | ||||
10765 | 03/06/2016 | 9030 | 1.0 | 3.53 |
14503 | 25/03/2016 | 7090 | 1.0 | 3.53 |
14859 | 29/03/2016 | 17691 | 1.0 | 3.74 |
19345 | 22/04/2016 | 3143 | 1.0 | 3.53 |
20413 | 27/04/2016 | 17196 | 1.0 | 3.53 |
# Get all trandactions of range of SKUs
df.loc[:,'DUV2Y':'F9JVE',:]
Date | Customer_ID | Quantity | Sales_Amount | ||
---|---|---|---|---|---|
Transaction_ID | SKU | ||||
10004 | F79YP | 03/02/2016 | 3546 | 1.0 | 14.25 |
10007 | E3PAN | 03/02/2016 | 4919 | 1.0 | 9.68 |
F7FQ5 | 03/02/2016 | 4919 | 3.0 | 7.29 | |
10012 | F64H7 | 03/02/2016 | 2465 | 1.0 | 2.86 |
10015 | EMJ1S | 03/02/2016 | 7474 | 1.0 | 3.68 |
... | ... | ... | ... | ... | ... |
9973 | ECUP5 | 03/01/2016 | 1288 | 1.0 | 12.48 |
9982 | F3M35 | 03/01/2016 | 8688 | 1.0 | 9.63 |
9984 | DWJC4 | 03/01/2016 | 3767 | 1.0 | 12.87 |
9988 | F90L2 | 03/01/2016 | 4888 | 1.0 | 2.37 |
9997 | DWYF6 | 03/01/2016 | 9165 | 1.0 | 11.93 |
3415 rows × 4 columns
df.groupby(level="Transaction_ID").sum() # sum per transaction
Quantity | Sales_Amount | |
---|---|---|
Transaction_ID | ||
1 | 1.0 | 3.13 |
10 | 2.0 | 7.49 |
100 | 1.0 | 3.34 |
10000 | 1.0 | 15.67 |
10001 | 1.0 | 8.37 |
... | ... | ... |
9995 | 2.0 | 10.81 |
9996 | 13.0 | 88.23 |
9997 | 1.0 | 11.93 |
9998 | 1.0 | 4.57 |
9999 | 3.0 | 43.83 |
31694 rows × 2 columns
df.groupby(level="SKU").sum()
Quantity | Sales_Amount | |
---|---|---|
SKU | ||
00GVC | 2.0 | 35.36 |
00OK1 | 13.0 | 16.56 |
0121I | 16.0 | 32.47 |
01IEO | 24.0 | 149.21 |
01IQT | 5.0 | 6.84 |
... | ... | ... |
ZWWB5 | 7.0 | 12.04 |
ZYF2U | 7.0 | 65.66 |
ZZ2AO | 1.0 | 24.38 |
ZZM1A | 26.0 | 164.21 |
ZZNC5 | 1.0 | 10.79 |
4448 rows × 2 columns
# Reset the index
df = df.reset_index()
df
Transaction_ID | SKU | Date | Customer_ID | Quantity | Sales_Amount | |
---|---|---|---|---|---|---|
0 | 1 | 0EM7L | 01/02/2016 | 2547 | 1.0 | 3.13 |
1 | 10 | KWDJZ | 01/02/2016 | 7548 | 1.0 | 5.38 |
2 | 10 | UNJKW | 01/02/2016 | 7548 | 1.0 | 2.11 |
3 | 100 | MRE4J | 01/02/2016 | 3131 | 1.0 | 3.34 |
4 | 10000 | P1T5K | 03/01/2016 | 5810 | 1.0 | 15.67 |
... | ... | ... | ... | ... | ... | ... |
63985 | 9997 | DWYF6 | 03/01/2016 | 9165 | 1.0 | 11.93 |
63986 | 9998 | ZVTO4 | 03/01/2016 | 2557 | 1.0 | 4.57 |
63987 | 9999 | 16GCL | 03/01/2016 | 7820 | 1.0 | 26.43 |
63988 | 9999 | DGPZK | 03/01/2016 | 7820 | 1.0 | 10.15 |
63989 | 9999 | U26S2 | 03/01/2016 | 7820 | 1.0 | 7.25 |
63990 rows × 6 columns
Merge or join operations combine datasets by linking rows using one or more keys. These operations are particularly important in relational databases (e.g., SQL-based).
sku_categ = pd.read_csv("sku_categ.csv")
sku_categ.head()
SKU | SKU_Category | |
---|---|---|
0 | 0EM7L | X52 |
1 | 68BRQ | 2ML |
2 | CZUZX | 0H2 |
3 | 549KK | 0H2 |
4 | K8EHH | 0H2 |
# Merge two dataframes by key
df = pd.merge(df, sku_categ, on='SKU')
df
Transaction_ID | SKU | Date | Customer_ID | Quantity | Sales_Amount | SKU_Category | |
---|---|---|---|---|---|---|---|
0 | 1 | 0EM7L | 01/02/2016 | 2547 | 1.0 | 3.13 | X52 |
1 | 12476 | 0EM7L | 16/03/2016 | 17168 | 1.0 | 3.13 | X52 |
2 | 19516 | 0EM7L | 22/04/2016 | 15706 | 1.0 | 3.13 | X52 |
3 | 2018 | 0EM7L | 14/01/2016 | 8283 | 1.0 | 3.13 | X52 |
4 | 23824 | 0EM7L | 15/05/2016 | 19800 | 1.0 | 3.34 | X52 |
... | ... | ... | ... | ... | ... | ... | ... |
63985 | 9926 | Q309I | 03/01/2016 | 6436 | 1.0 | 4.06 | A0G |
63986 | 9956 | FN1CT | 03/01/2016 | 245 | 1.0 | 26.51 | LSD |
63987 | 9959 | 64NPA | 03/01/2016 | 1658 | 1.0 | 6.00 | JKC |
63988 | 9980 | TGW8R | 03/01/2016 | 44 | 1.0 | 3.85 | A38 |
63989 | 9992 | NVRQE | 03/01/2016 | 3275 | 1.0 | 6.25 | T80 |
63990 rows × 7 columns
df2 = pd.read_csv('sku_transactions_2.csv')
df2
Date | Customer_ID | Transaction_ID | SKU_Category | SKU | Quantity | Sales_Amount | |
---|---|---|---|---|---|---|---|
0 | 2016-07-01 | 5694 | 731 | 0H2 | 51OBI | 1.0 | 6.75 |
1 | 2016-07-01 | 5694 | 731 | 3WV | WKRVM | 1.0 | 4.75 |
2 | 2016-07-01 | 1886 | 732 | BZU | 68AG2 | 1.0 | 7.23 |
3 | 2016-07-01 | 1569 | 733 | FU5 | 3ZY0H | 1.0 | 5.01 |
4 | 2016-07-01 | 8837 | 734 | XVK | 01IEO | 1.0 | 4.96 |
... | ... | ... | ... | ... | ... | ... | ... |
67711 | 2016-12-31 | 16860 | 64622 | R6E | V6P7N | 1.0 | 2.13 |
67712 | 2016-12-31 | 16860 | 64622 | R6E | F90L2 | 1.0 | 2.49 |
67713 | 2016-12-31 | 16860 | 64622 | SFC | AM6EH | 1.0 | 2.86 |
67714 | 2016-12-31 | 17306 | 64616 | C8Z | 520UE | 1.0 | 8.49 |
67715 | 2016-12-31 | 13935 | 64609 | 0H2 | 6R0Z5 | 1.0 | 6.12 |
67716 rows × 7 columns
pd.concat([df, df2])
Transaction_ID | SKU | Date | Customer_ID | Quantity | Sales_Amount | SKU_Category | |
---|---|---|---|---|---|---|---|
0 | 1 | 0EM7L | 01/02/2016 | 2547 | 1.0 | 3.13 | X52 |
1 | 12476 | 0EM7L | 16/03/2016 | 17168 | 1.0 | 3.13 | X52 |
2 | 19516 | 0EM7L | 22/04/2016 | 15706 | 1.0 | 3.13 | X52 |
3 | 2018 | 0EM7L | 14/01/2016 | 8283 | 1.0 | 3.13 | X52 |
4 | 23824 | 0EM7L | 15/05/2016 | 19800 | 1.0 | 3.34 | X52 |
... | ... | ... | ... | ... | ... | ... | ... |
67711 | 64622 | V6P7N | 2016-12-31 | 16860 | 1.0 | 2.13 | R6E |
67712 | 64622 | F90L2 | 2016-12-31 | 16860 | 1.0 | 2.49 | R6E |
67713 | 64622 | AM6EH | 2016-12-31 | 16860 | 1.0 | 2.86 | SFC |
67714 | 64616 | 520UE | 2016-12-31 | 17306 | 1.0 | 8.49 | C8Z |
67715 | 64609 | 6R0Z5 | 2016-12-31 | 13935 | 1.0 | 6.12 | 0H2 |
131706 rows × 7 columns
import numpy as np
data = pd.DataFrame(np.arange(6).reshape((2, 3)),
index=pd.Index(["Ohio", "Colorado"], name="state"),
columns=pd.Index(["one", "two", "three"],
name="number"))
data
number | one | two | three |
---|---|---|---|
state | |||
Ohio | 0 | 1 | 2 |
Colorado | 3 | 4 | 5 |
# Stack dataframe
result = data.stack()
result
state number Ohio one 0 two 1 three 2 Colorado one 3 two 4 three 5 dtype: int64
# Unstack
result.unstack()
number | one | two | three |
---|---|---|---|
state | |||
Ohio | 0 | 1 | 2 |
Colorado | 3 | 4 | 5 |
By default, the innermost level is unstacked
# unstack different level
result.unstack(level=0)
state | Ohio | Colorado |
---|---|---|
number | ||
one | 0 | 3 |
two | 1 | 4 |
three | 2 | 5 |
import pandas as pd
import numpy as np
df = pd.DataFrame({"key1" : ["a", "a", None, "b", "b", "a", None],
"key2" : pd.Series([1, 2, 1, 2, 1, None, 1], dtype="Int64"),
"data1" : np.random.standard_normal(7),
"data2" : np.random.standard_normal(7)})
df
key1 | key2 | data1 | data2 | |
---|---|---|---|---|
0 | a | 1 | -0.831921 | -0.285859 |
1 | a | 2 | -0.775401 | -0.165066 |
2 | None | 1 | -0.912723 | 0.638457 |
3 | b | 2 | -0.815619 | -0.579768 |
4 | b | 1 | 0.677665 | 0.785730 |
5 | a | <NA> | 0.805057 | 0.293752 |
6 | None | 1 | 0.006672 | 0.336428 |
Compute the mean of the data1 column using the labels from key1
grouped = df["data1"].groupby(df["key1"])
grouped
<pandas.core.groupby.generic.SeriesGroupBy object at 0x7fa672b27c10>
Did not compute anything yet. It is an object that has all of the information needed to apply some operation to each of the groups
# Compute the mean
grouped.mean()
key1 a -0.267422 b -0.068977 Name: data1, dtype: float64
# Groupby multiple keys(levels)
means = df["data1"].groupby([df["key1"], df["key2"]]).mean()
means
key1 key2 a 1 -0.831921 2 -0.775401 b 1 0.677665 2 -0.815619 Name: data1, dtype: float64
Any missing values in a group key are excluded from the result by default.
# Groupby multiple keys(levels)
means = df["data1"].groupby([df["key1"], df["key2"]], dropna=False).mean()
means
key1 key2 a 1 -0.831921 2 -0.775401 <NA> 0.805057 b 1 0.677665 2 -0.815619 NaN 1 -0.453026 Name: data1, dtype: float64
# Unstack the previous result
means.unstack()
key2 | 1 | 2 | <NA> |
---|---|---|---|
key1 | |||
a | -0.831921 | -0.775401 | 0.805057 |
b | 0.677665 | -0.815619 | NaN |
NaN | -0.453026 | NaN | NaN |
The object returned by groupby supports iteration, generating a sequence of 2-tuples containing the group name along with the chunk of data
for name, group in df.groupby("key1"):
print(name)
print(group)
a key1 key2 data1 data2 0 a 1 -0.831921 -0.285859 1 a 2 -0.775401 -0.165066 5 a <NA> 0.805057 0.293752 b key1 key2 data1 data2 3 b 2 -0.815619 -0.579768 4 b 1 0.677665 0.785730
people = pd.DataFrame(np.random.standard_normal((5, 5)),
columns=["a", "b", "c", "d", "e"],
index=["Joe", "Steve", "Wanda", "Jill", "Trey"])
people.iloc[2:3, [1, 2]] = np.nan
people
a | b | c | d | e | |
---|---|---|---|---|---|
Joe | 0.268135 | -0.236627 | -0.992172 | -0.120391 | -1.556781 |
Steve | 0.558533 | 0.603487 | 1.312584 | 1.081793 | 0.818569 |
Wanda | 1.258828 | NaN | NaN | -0.946992 | -2.048267 |
Jill | -0.268992 | 0.357191 | -1.020337 | 0.239886 | -0.251390 |
Trey | 1.488786 | -0.584070 | -0.113982 | 0.882762 | 0.161310 |
# Group by columns
mapping = {"a": "red", "b": "red", "c": "blue", "d": "blue", "e": "red", "f" : "orange"}
people.groupby(mapping, axis="columns").sum()
blue | red | |
---|---|---|
Joe | -1.112563 | -1.525273 |
Steve | 2.394378 | 1.980589 |
Wanda | -0.946992 | -0.789439 |
Jill | -0.780450 | -0.163190 |
Trey | 0.768780 | 1.066027 |
Aggregations refer to any data transformation that produces scalar values from arrays.
But you can define your own aggregations
def peak_to_peak(arr):
return arr.max() - arr.min()
df.groupby('key1').agg(peak_to_peak)
key2 | data1 | data2 | |
---|---|---|---|
key1 | |||
a | 1 | 1.636979 | 0.579612 |
b | 1 | 1.493284 | 1.365498 |
df.groupby('key1').describe()
key2 | data1 | data2 | |||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | mean | std | min | 25% | 50% | 75% | max | count | mean | ... | 75% | max | count | mean | std | min | 25% | 50% | 75% | max | |
key1 | |||||||||||||||||||||
a | 2.0 | 1.5 | 0.707107 | 1.0 | 1.25 | 1.5 | 1.75 | 2.0 | 3.0 | -0.267422 | ... | 0.014828 | 0.805057 | 3.0 | -0.052391 | 0.305793 | -0.285859 | -0.225463 | -0.165066 | 0.064343 | 0.293752 |
b | 2.0 | 1.5 | 0.707107 | 1.0 | 1.25 | 1.5 | 1.75 | 2.0 | 2.0 | -0.068977 | ... | 0.304344 | 0.677665 | 2.0 | 0.102981 | 0.965553 | -0.579768 | -0.238393 | 0.102981 | 0.444356 | 0.785730 |
2 rows × 24 columns
df = pd.read_csv("ds_salaries.csv")
df.head()
#https://www.kaggle.com/datasets/zain280/data-science-salaries
id | work_year | experience_level | employment_type | job_title | salary | salary_currency | salary_in_usd | employee_residence | remote_ratio | company_location | company_size | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 2020 | MI | FT | Data Scientist | 70000 | EUR | 79833 | DE | 0 | DE | L |
1 | 1 | 2020 | SE | FT | Machine Learning Scientist | 260000 | USD | 260000 | JP | 0 | JP | S |
2 | 2 | 2020 | SE | FT | Big Data Engineer | 85000 | GBP | 109024 | GB | 50 | GB | M |
3 | 3 | 2020 | MI | FT | Product Data Analyst | 20000 | USD | 20000 | HN | 0 | HN | S |
4 | 4 | 2020 | SE | FT | Machine Learning Engineer | 150000 | USD | 150000 | US | 50 | US | L |
Question: What is the average salary for 'Data Scientist' ?
df.groupby('job_title').mean().loc['Data Scientist']
# df['job_title'].value_counts()
id 314.832168 work_year 2021.391608 salary 508347.202797 salary_in_usd 108187.832168 remote_ratio 63.986014 Name: Data Scientist, dtype: float64
Question: Are salaries changing betwen 2020 to 2022? for all titles ?
df.groupby(['job_title','work_year']).median()
id | salary | salary_in_usd | remote_ratio | ||
---|---|---|---|---|---|
job_title | work_year | ||||
3D Computer Vision Researcher | 2021 | 77.0 | 400000.0 | 5409.0 | 50.0 |
AI Scientist | 2020 | 52.0 | 300000.0 | 45896.0 | 50.0 |
2021 | 178.5 | 33500.0 | 15026.5 | 100.0 | |
2022 | 498.5 | 160000.0 | 160000.0 | 50.0 | |
Analytics Engineer | 2022 | 464.0 | 179850.0 | 179850.0 | 50.0 |
... | ... | ... | ... | ... | ... |
Product Data Analyst | 2020 | 12.0 | 235000.0 | 13036.0 | 50.0 |
Research Scientist | 2020 | 29.5 | 246000.0 | 246000.0 | 25.0 |
2021 | 198.5 | 64999.5 | 66904.5 | 50.0 | |
2022 | 502.5 | 102500.0 | 106713.5 | 50.0 | |
Staff Data Scientist | 2021 | 283.0 | 105000.0 | 105000.0 | 100.0 |
98 rows × 4 columns
Question: Will you prefer to work in Large companies for high salary?
df.groupby('company_size').median()
id | work_year | salary | salary_in_usd | remote_ratio | |
---|---|---|---|---|---|
company_size | |||||
L | 194.5 | 2021.0 | 120250.0 | 100000.0 | 100.0 |
M | 392.5 | 2022.0 | 116075.0 | 113188.0 | 100.0 |
S | 149.0 | 2021.0 | 80000.0 | 65000.0 | 100.0 |
Question: Show the top 5 salaries from each job title
def find_top_5(arr):
return arr.sort_values('salary_in_usd', ascending=False)[:5]
df.groupby('job_title').apply(find_top_5).loc['Data Scientist']
id | work_year | experience_level | employment_type | job_title | salary | salary_currency | salary_in_usd | employee_residence | remote_ratio | company_location | company_size | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
63 | 63 | 2020 | SE | FT | Data Scientist | 412000 | USD | 412000 | US | 100 | US | L |
416 | 416 | 2022 | SE | FT | Data Scientist | 260000 | USD | 260000 | US | 100 | US | M |
486 | 486 | 2022 | SE | FT | Data Scientist | 230000 | USD | 230000 | US | 100 | US | M |
592 | 592 | 2022 | SE | FT | Data Scientist | 230000 | USD | 230000 | US | 100 | US | M |
472 | 472 | 2022 | SE | FT | Data Scientist | 220000 | USD | 220000 | US | 100 | US | M |