整理了 pandas 库的常用指令
- iloc 切片,索引
1
| print(train_data.iloc[0:4, [0, 1, 2, 3, -3, -2, -1]])
|
- concat 拼接
1
| all_features = pd.concat((train_data.iloc[:, 1:-1], test_data.iloc[:, 1:]))
|
- 处理数值
1 2 3 4 5 6 7 8
| numeric_features = all_features.dtypes[all_features.dtypes != 'object'].index
all_features[numeric_features] = all_features[numeric_features].apply( lambda x: (x - x.mean()) / (x.std()))
all_features[numeric_features] = all_features[numeric_features].fillna(0)
|
- 处理离散值
1 2 3 4
|
all_features = pd.get_dummies(all_features, dummy_na=True) all_features.shape
|
- 提取 numpy 并转换为 tensor
1 2 3 4 5
| n_train = train_data.shape[0] train_features = torch.tensor(all_features[:n_train].values, dtype=torch.float32) test_features = torch.tensor(all_features[n_train:].values, dtype=torch.float32) train_labels = torch.tensor( train_data.SalePrice.values.reshape(-1, 1), dtype=torch.float32)
|