diff --git a/BenchmarkScripts/ScanNet200/README.md b/BenchmarkScripts/ScanNet200/README.md new file mode 100644 index 0000000..f175272 --- /dev/null +++ b/BenchmarkScripts/ScanNet200/README.md @@ -0,0 +1,60 @@ +# ScanNet200 Preprocessing Scripts and description + +We provide the preprocessing scripts and benchmark data for the ScanNet200 Benchmark. +The raw scans and annotations are shared with the original [ScanNet benchmark](http://kaldir.vc.in.tum.de/scannet_benchmark); these scripts provided output semantic and instance labeled meshes according to the ScanNet200 categories. +The ScanNet scene meshes are surface annotated, where every vertex is described with the raw category id. +These IDs can be parsed based on the mapping defined in the `scannetv2-labels.combined.tsv`. + +**Important Note:** The `scannetv2-labels.combined.tsv` file was updated with the introduction of the ScanNet200 benchmark, please download the latest version using the script obtained after filling the [Terms of Use form](https://github.com/ScanNet/ScanNet#scannet-data). + + +Differences and similarities to the original benchmark + - The ScanNet200 benchmark evaluates 200 categories, an order of magnitude larger than the original set of 20 classical semantic labels. + - This new benchmark follows the original _train_/_val_/_test_ scene splits published in this repository, + - We presented a further split of the category sets into three sets based on their point and instance frequencies, namely **head**, **common**, and **tail**. The category splits can be found in `scannet200_split.py` file + - The raw annotations in the training set containing 550 distinct categories, many of which appear only once, and were filtered to produce the large-vocabulary, challenging ScanNet200 setting. The mapping of annotation category IDs to ScanNet200 valid categories can be found in `scannet200_constants.py`. + - This larger vocabulary includes a strong natural imbalance and diversity for evaluating modern 3D scene understanding methods in a challenging scenario. + +![](docs/dataset_histograms.jpg) + +We provide scripts for preprocessing and parsing the scene meshes to semantically and instance labeled meshes in `preprocess_scannet200.py`. +Additionally, helper functions such as mesh voxelization can be found in `utils.py` + +### Running the preprocessing + +The scripts are developed and tested with Python 3, and basic libraries like _pandas_ and _plyfile_ are expected to be installed. +Additionally, we rely on _trimesh_ and _MinkowskiEngine_ for uniform mesh voxelization, but these libraries are not strictly necessary + +For the installation of all required libraries + +``` +conda create -n scannet200 python=3.8 +pip install -r requirements.txt +``` + +For the optional MinkowskiEngine required in the voxelization script, please refer to the [installation guide](https://github.com/NVIDIA/MinkowskiEngine#anaconda) corresponding the specific GPU version. + + +The preprocessing can be started with + +``` +python --dataset_root + --output_root + --label_map_file +``` + +Where script options: +``` +--dataset_root: + Path to the ScanNet dataset containing scene folders +--output_root: + Output path where train/val folders will be located +--label_map_file: + path to the updated scannetv2-labels.combined.tsv +--num_workers: + The number of parallel workers for multiprocessing + default=4 +--train_val_splits_path: + Where the txt files with the train/val splits live + default='../../Tasks/Benchmark' +``` diff --git a/BenchmarkScripts/ScanNet200/docs/dataset_histograms.jpg b/BenchmarkScripts/ScanNet200/docs/dataset_histograms.jpg new file mode 100644 index 0000000..227baaa Binary files /dev/null and b/BenchmarkScripts/ScanNet200/docs/dataset_histograms.jpg differ diff --git a/BenchmarkScripts/ScanNet200/preprocess_scannet200.py b/BenchmarkScripts/ScanNet200/preprocess_scannet200.py new file mode 100644 index 0000000..6f0e2bb --- /dev/null +++ b/BenchmarkScripts/ScanNet200/preprocess_scannet200.py @@ -0,0 +1,133 @@ +import warnings +warnings.filterwarnings("ignore", category=DeprecationWarning) + +import sys +import os +import argparse +import glob +import json +from concurrent.futures import ProcessPoolExecutor +from itertools import repeat + +# Load external constants +from scannet200_constants import * +from scannet200_splits import * +from utils import * + +CLOUD_FILE_PFIX = '_vh_clean_2' +SEGMENTS_FILE_PFIX = '.0.010000.segs.json' +AGGREGATIONS_FILE_PFIX = '.aggregation.json' +CLASS_IDs = VALID_CLASS_IDS_200 + +def handle_process(scene_path, output_path, labels_pd, train_scenes, val_scenes): + + scene_id = scene_path.split('/')[-1] + mesh_path = os.path.join(scene_path, f'{scene_id}{CLOUD_FILE_PFIX}.ply') + segments_file = os.path.join(scene_path, f'{scene_id}{CLOUD_FILE_PFIX}{SEGMENTS_FILE_PFIX}') + aggregations_file = os.path.join(scene_path, f'{scene_id}{AGGREGATIONS_FILE_PFIX}') + info_file = os.path.join(scene_path, f'{scene_id}.txt') + + if scene_id in train_scenes: + output_file = os.path.join(output_path, 'train', f'{scene_id}.ply') + split_name = 'train' + elif scene_id in val_scenes: + output_file = os.path.join(output_path, 'val', f'{scene_id}.ply') + split_name = 'val' + else: + output_file = os.path.join(output_path, 'test', f'{scene_id}.ply') + split_name = 'test' + + print('Processing: ', scene_id, 'in ', split_name) + + # Rotating the mesh to axis aligned + info_dict = {} + with open(info_file) as f: + for line in f: + (key, val) = line.split(" = ") + info_dict[key] = np.fromstring(val, sep=' ') + + if 'axisAlignment' not in info_dict: + rot_matrix = np.identity(4) + else: + rot_matrix = info_dict['axisAlignment'].reshape(4, 4) + + pointcloud, faces_array = read_plymesh(mesh_path) + points = pointcloud[:, :3] + colors = pointcloud[:, 3:6] + alphas = pointcloud[:, -1] + + # Rotate PC to axis aligned + r_points = pointcloud[:, :3].transpose() + r_points = np.append(r_points, np.ones((1, r_points.shape[1])), axis=0) + r_points = np.dot(rot_matrix, r_points) + pointcloud = np.append(r_points.transpose()[:, :3], pointcloud[:, 3:], axis=1) + + # Load segments file + with open(segments_file) as f: + segments = json.load(f) + seg_indices = np.array(segments['segIndices']) + + # Load Aggregations file + with open(aggregations_file) as f: + aggregation = json.load(f) + seg_groups = np.array(aggregation['segGroups']) + + # Generate new labels + labelled_pc = np.zeros((pointcloud.shape[0], 1)) + instance_ids = np.zeros((pointcloud.shape[0], 1)) + for group in seg_groups: + segment_points, p_inds, label_id = point_indices_from_group(pointcloud, seg_indices, group, labels_pd, CLASS_IDs) + + labelled_pc[p_inds] = label_id + instance_ids[p_inds] = group['id'] + + labelled_pc = labelled_pc.astype(int) + instance_ids = instance_ids.astype(int) + + # Concatenate with original cloud + processed_vertices = np.hstack((pointcloud[:, :6], labelled_pc, instance_ids)) + + if (np.any(np.isnan(processed_vertices)) or not np.all(np.isfinite(processed_vertices))): + raise ValueError('nan') + + # Save processed mesh + save_plymesh(processed_vertices, faces_array, output_file, with_label=True, verbose=False) + + # Uncomment the following lines if saving the output in voxelized point cloud + # quantized_points, quantized_scene_colors, quantized_labels, quantized_instances = voxelize_pointcloud(points, colors, labelled_pc, instance_ids, faces_array) + # quantized_pc = np.hstack((quantized_points, quantized_scene_colors, quantized_labels, quantized_instances)) + # save_plymesh(quantized_pc, faces=None, filename=output_file, with_label=True, verbose=False) + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--dataset_root', required=True, help='Path to the ScanNet dataset containing scene folders') + parser.add_argument('--output_root', required=True, help='Output path where train/val folders will be located') + parser.add_argument('--label_map_file', required=True, help='path to scannetv2-labels.combined.tsv') + parser.add_argument('--num_workers', default=4, type=int, help='The number of parallel workers') + parser.add_argument('--train_val_splits_path', default='../../Tasks/Benchmark', help='Where the txt files with the train/val splits live') + config = parser.parse_args() + + # Load label map + labels_pd = pd.read_csv(config.label_map_file, sep='\t', header=0) + + # Load train/val splits + with open(config.train_val_splits_path + '/scannetv2_train.txt') as train_file: + train_scenes = train_file.read().splitlines() + with open(config.train_val_splits_path + '/scannetv2_val.txt') as val_file: + val_scenes = val_file.read().splitlines() + + # Create output directories + train_output_dir = os.path.join(config.output_root, 'train') + if not os.path.exists(train_output_dir): + os.makedirs(train_output_dir) + val_output_dir = os.path.join(config.output_root, 'val') + if not os.path.exists(val_output_dir): + os.makedirs(val_output_dir) + + # Load scene paths + scene_paths = sorted(glob.glob(config.dataset_root + '/*')) + + # Preprocess data. + pool = ProcessPoolExecutor(max_workers=config.num_workers) + print('Processing scenes...') + _ = list(pool.map(handle_process, scene_paths, repeat(config.output_root), repeat(labels_pd), repeat(train_scenes), repeat(val_scenes))) diff --git a/BenchmarkScripts/ScanNet200/requirements.txt b/BenchmarkScripts/ScanNet200/requirements.txt new file mode 100644 index 0000000..ccf5e0a --- /dev/null +++ b/BenchmarkScripts/ScanNet200/requirements.txt @@ -0,0 +1,15 @@ +certifi==2022.5.18.1 +joblib==1.1.0 +numpy==1.22.4 +pandas==1.4.2 +pip==21.2.4 +plyfile==0.7.4 +python-dateutil==2.8.2 +pytz==2022.1 +scikit-learn==1.1.1 +scipy==1.8.1 +setuptools==61.2.0 +six==1.16.0 +threadpoolctl==3.1.0 +trimesh==3.12.6 +wheel==0.37.1 diff --git a/BenchmarkScripts/ScanNet200/scannet200_constants.py b/BenchmarkScripts/ScanNet200/scannet200_constants.py new file mode 100644 index 0000000..388c497 --- /dev/null +++ b/BenchmarkScripts/ScanNet200/scannet200_constants.py @@ -0,0 +1,277 @@ +### ScanNet Benchmark constants ### +VALID_CLASS_IDS_20 = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39) + +CLASS_LABELS_20 = ('wall', 'floor', 'cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window', + 'bookshelf', 'picture', 'counter', 'desk', 'curtain', 'refrigerator', + 'shower curtain', 'toilet', 'sink', 'bathtub', 'otherfurniture') + +SCANNET_COLOR_MAP_20 = { + 0: (0., 0., 0.), + 1: (174., 199., 232.), + 2: (152., 223., 138.), + 3: (31., 119., 180.), + 4: (255., 187., 120.), + 5: (188., 189., 34.), + 6: (140., 86., 75.), + 7: (255., 152., 150.), + 8: (214., 39., 40.), + 9: (197., 176., 213.), + 10: (148., 103., 189.), + 11: (196., 156., 148.), + 12: (23., 190., 207.), + 14: (247., 182., 210.), + 15: (66., 188., 102.), + 16: (219., 219., 141.), + 17: (140., 57., 197.), + 18: (202., 185., 52.), + 19: (51., 176., 203.), + 20: (200., 54., 131.), + 21: (92., 193., 61.), + 22: (78., 71., 183.), + 23: (172., 114., 82.), + 24: (255., 127., 14.), + 25: (91., 163., 138.), + 26: (153., 98., 156.), + 27: (140., 153., 101.), + 28: (158., 218., 229.), + 29: (100., 125., 154.), + 30: (178., 127., 135.), + 32: (146., 111., 194.), + 33: (44., 160., 44.), + 34: (112., 128., 144.), + 35: (96., 207., 209.), + 36: (227., 119., 194.), + 37: (213., 92., 176.), + 38: (94., 106., 211.), + 39: (82., 84., 163.), + 40: (100., 85., 144.), +} + +### ScanNet200 Benchmark constants ### +VALID_CLASS_IDS_200 = ( +1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 26, 27, 28, 29, 31, 32, 33, 34, 35, 36, 38, 39, 40, 41, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, +72, 73, 74, 75, 76, 77, 78, 79, 80, 82, 84, 86, 87, 88, 89, 90, 93, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 110, 112, 115, 116, 118, 120, 121, 122, 125, 128, 130, 131, 132, 134, 136, 138, 139, 140, 141, 145, 148, 154, +155, 156, 157, 159, 161, 163, 165, 166, 168, 169, 170, 177, 180, 185, 188, 191, 193, 195, 202, 208, 213, 214, 221, 229, 230, 232, 233, 242, 250, 261, 264, 276, 283, 286, 300, 304, 312, 323, 325, 331, 342, 356, 370, 392, 395, 399, 408, 417, +488, 540, 562, 570, 572, 581, 609, 748, 776, 1156, 1163, 1164, 1165, 1166, 1167, 1168, 1169, 1170, 1171, 1172, 1173, 1174, 1175, 1176, 1178, 1179, 1180, 1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191) + +CLASS_LABELS_200 = ( +'wall', 'chair', 'floor', 'table', 'door', 'couch', 'cabinet', 'shelf', 'desk', 'office chair', 'bed', 'pillow', 'sink', 'picture', 'window', 'toilet', 'bookshelf', 'monitor', 'curtain', 'book', 'armchair', 'coffee table', 'box', +'refrigerator', 'lamp', 'kitchen cabinet', 'towel', 'clothes', 'tv', 'nightstand', 'counter', 'dresser', 'stool', 'cushion', 'plant', 'ceiling', 'bathtub', 'end table', 'dining table', 'keyboard', 'bag', 'backpack', 'toilet paper', +'printer', 'tv stand', 'whiteboard', 'blanket', 'shower curtain', 'trash can', 'closet', 'stairs', 'microwave', 'stove', 'shoe', 'computer tower', 'bottle', 'bin', 'ottoman', 'bench', 'board', 'washing machine', 'mirror', 'copier', +'basket', 'sofa chair', 'file cabinet', 'fan', 'laptop', 'shower', 'paper', 'person', 'paper towel dispenser', 'oven', 'blinds', 'rack', 'plate', 'blackboard', 'piano', 'suitcase', 'rail', 'radiator', 'recycling bin', 'container', +'wardrobe', 'soap dispenser', 'telephone', 'bucket', 'clock', 'stand', 'light', 'laundry basket', 'pipe', 'clothes dryer', 'guitar', 'toilet paper holder', 'seat', 'speaker', 'column', 'bicycle', 'ladder', 'bathroom stall', 'shower wall', +'cup', 'jacket', 'storage bin', 'coffee maker', 'dishwasher', 'paper towel roll', 'machine', 'mat', 'windowsill', 'bar', 'toaster', 'bulletin board', 'ironing board', 'fireplace', 'soap dish', 'kitchen counter', 'doorframe', +'toilet paper dispenser', 'mini fridge', 'fire extinguisher', 'ball', 'hat', 'shower curtain rod', 'water cooler', 'paper cutter', 'tray', 'shower door', 'pillar', 'ledge', 'toaster oven', 'mouse', 'toilet seat cover dispenser', +'furniture', 'cart', 'storage container', 'scale', 'tissue box', 'light switch', 'crate', 'power outlet', 'decoration', 'sign', 'projector', 'closet door', 'vacuum cleaner', 'candle', 'plunger', 'stuffed animal', 'headphones', 'dish rack', +'broom', 'guitar case', 'range hood', 'dustpan', 'hair dryer', 'water bottle', 'handicap bar', 'purse', 'vent', 'shower floor', 'water pitcher', 'mailbox', 'bowl', 'paper bag', 'alarm clock', 'music stand', 'projector screen', 'divider', +'laundry detergent', 'bathroom counter', 'object', 'bathroom vanity', 'closet wall', 'laundry hamper', 'bathroom stall door', 'ceiling light', 'trash bin', 'dumbbell', 'stair rail', 'tube', 'bathroom cabinet', 'cd case', 'closet rod', +'coffee kettle', 'structure', 'shower head', 'keyboard piano', 'case of water bottles', 'coat rack', 'storage organizer', 'folded chair', 'fire alarm', 'power strip', 'calendar', 'poster', 'potted plant', 'luggage', 'mattress') + +SCANNET_COLOR_MAP_200 = { +0: (0., 0., 0.), +1: (174., 199., 232.), +2: (188., 189., 34.), +3: (152., 223., 138.), +4: (255., 152., 150.), +5: (214., 39., 40.), +6: (91., 135., 229.), +7: (31., 119., 180.), +8: (229., 91., 104.), +9: (247., 182., 210.), +10: (91., 229., 110.), +11: (255., 187., 120.), +13: (141., 91., 229.), +14: (112., 128., 144.), +15: (196., 156., 148.), +16: (197., 176., 213.), +17: (44., 160., 44.), +18: (148., 103., 189.), +19: (229., 91., 223.), +21: (219., 219., 141.), +22: (192., 229., 91.), +23: (88., 218., 137.), +24: (58., 98., 137.), +26: (177., 82., 239.), +27: (255., 127., 14.), +28: (237., 204., 37.), +29: (41., 206., 32.), +31: (62., 143., 148.), +32: (34., 14., 130.), +33: (143., 45., 115.), +34: (137., 63., 14.), +35: (23., 190., 207.), +36: (16., 212., 139.), +38: (90., 119., 201.), +39: (125., 30., 141.), +40: (150., 53., 56.), +41: (186., 197., 62.), +42: (227., 119., 194.), +44: (38., 100., 128.), +45: (120., 31., 243.), +46: (154., 59., 103.), +47: (169., 137., 78.), +48: (143., 245., 111.), +49: (37., 230., 205.), +50: (14., 16., 155.), +51: (196., 51., 182.), +52: (237., 80., 38.), +54: (138., 175., 62.), +55: (158., 218., 229.), +56: (38., 96., 167.), +57: (190., 77., 246.), +58: (208., 49., 84.), +59: (208., 193., 72.), +62: (55., 220., 57.), +63: (10., 125., 140.), +64: (76., 38., 202.), +65: (191., 28., 135.), +66: (211., 120., 42.), +67: (118., 174., 76.), +68: (17., 242., 171.), +69: (20., 65., 247.), +70: (208., 61., 222.), +71: (162., 62., 60.), +72: (210., 235., 62.), +73: (45., 152., 72.), +74: (35., 107., 149.), +75: (160., 89., 237.), +76: (227., 56., 125.), +77: (169., 143., 81.), +78: (42., 143., 20.), +79: (25., 160., 151.), +80: (82., 75., 227.), +82: (253., 59., 222.), +84: (240., 130., 89.), +86: (123., 172., 47.), +87: (71., 194., 133.), +88: (24., 94., 205.), +89: (134., 16., 179.), +90: (159., 32., 52.), +93: (213., 208., 88.), +95: (64., 158., 70.), +96: (18., 163., 194.), +97: (65., 29., 153.), +98: (177., 10., 109.), +99: (152., 83., 7.), +100: (83., 175., 30.), +101: (18., 199., 153.), +102: (61., 81., 208.), +103: (213., 85., 216.), +104: (170., 53., 42.), +105: (161., 192., 38.), +106: (23., 241., 91.), +107: (12., 103., 170.), +110: (151., 41., 245.), +112: (133., 51., 80.), +115: (184., 162., 91.), +116: (50., 138., 38.), +118: (31., 237., 236.), +120: (39., 19., 208.), +121: (223., 27., 180.), +122: (254., 141., 85.), +125: (97., 144., 39.), +128: (106., 231., 176.), +130: (12., 61., 162.), +131: (124., 66., 140.), +132: (137., 66., 73.), +134: (250., 253., 26.), +136: (55., 191., 73.), +138: (60., 126., 146.), +139: (153., 108., 234.), +140: (184., 58., 125.), +141: (135., 84., 14.), +145: (139., 248., 91.), +148: (53., 200., 172.), +154: (63., 69., 134.), +155: (190., 75., 186.), +156: (127., 63., 52.), +157: (141., 182., 25.), +159: (56., 144., 89.), +161: (64., 160., 250.), +163: (182., 86., 245.), +165: (139., 18., 53.), +166: (134., 120., 54.), +168: (49., 165., 42.), +169: (51., 128., 133.), +170: (44., 21., 163.), +177: (232., 93., 193.), +180: (176., 102., 54.), +185: (116., 217., 17.), +188: (54., 209., 150.), +191: (60., 99., 204.), +193: (129., 43., 144.), +195: (252., 100., 106.), +202: (187., 196., 73.), +208: (13., 158., 40.), +213: (52., 122., 152.), +214: (128., 76., 202.), +221: (187., 50., 115.), +229: (180., 141., 71.), +230: (77., 208., 35.), +232: (72., 183., 168.), +233: (97., 99., 203.), +242: (172., 22., 158.), +250: (155., 64., 40.), +261: (118., 159., 30.), +264: (69., 252., 148.), +276: (45., 103., 173.), +283: (111., 38., 149.), +286: (184., 9., 49.), +300: (188., 174., 67.), +304: (53., 206., 53.), +312: (97., 235., 252.), +323: (66., 32., 182.), +325: (236., 114., 195.), +331: (241., 154., 83.), +342: (133., 240., 52.), +356: (16., 205., 144.), +370: (75., 101., 198.), +392: (237., 95., 251.), +395: (191., 52., 49.), +399: (227., 254., 54.), +408: (49., 206., 87.), +417: (48., 113., 150.), +488: (125., 73., 182.), +540: (229., 32., 114.), +562: (158., 119., 28.), +570: (60., 205., 27.), +572: (18., 215., 201.), +581: (79., 76., 153.), +609: (134., 13., 116.), +748: (192., 97., 63.), +776: (108., 163., 18.), +1156: (95., 220., 156.), +1163: (98., 141., 208.), +1164: (144., 19., 193.), +1165: (166., 36., 57.), +1166: (212., 202., 34.), +1167: (23., 206., 34.), +1168: (91., 211., 236.), +1169: (79., 55., 137.), +1170: (182., 19., 117.), +1171: (134., 76., 14.), +1172: (87., 185., 28.), +1173: (82., 224., 187.), +1174: (92., 110., 214.), +1175: (168., 80., 171.), +1176: (197., 63., 51.), +1178: (175., 199., 77.), +1179: (62., 180., 98.), +1180: (8., 91., 150.), +1181: (77., 15., 130.), +1182: (154., 65., 96.), +1183: (197., 152., 11.), +1184: (59., 155., 45.), +1185: (12., 147., 145.), +1186: (54., 35., 219.), +1187: (210., 73., 181.), +1188: (221., 124., 77.), +1189: (149., 214., 66.), +1190: (72., 185., 134.), +1191: (42., 94., 198.), +} + +### For instance segmentation the non-object categories ### +VALID_PANOPTIC_IDS = (1, 3) + +CLASS_LABELS_PANOPTIC = ('wall', 'floor') \ No newline at end of file diff --git a/BenchmarkScripts/ScanNet200/scannet200_splits.py b/BenchmarkScripts/ScanNet200/scannet200_splits.py new file mode 100644 index 0000000..cf3d134 --- /dev/null +++ b/BenchmarkScripts/ScanNet200/scannet200_splits.py @@ -0,0 +1,18 @@ +### This file contains the HEAD - COMMON - TAIL split category ids for ScanNet 200 + +HEAD_CATS_SCANNET_200 = ['tv stand', 'curtain', 'blinds', 'shower curtain', 'bookshelf', 'tv', 'kitchen cabinet', 'pillow', 'lamp', 'dresser', 'monitor', 'object', 'ceiling', 'board', 'stove', 'closet wall', 'couch', 'office chair', 'kitchen counter', 'shower', 'closet', 'doorframe', 'sofa chair', 'mailbox', 'nightstand', 'washing machine', 'picture', 'book', 'sink', 'recycling bin', 'table', 'backpack', 'shower wall', 'toilet', 'copier', 'counter', 'stool', 'refrigerator', 'window', 'file cabinet', 'chair', 'wall', 'plant', 'coffee table', 'stairs', 'armchair', 'cabinet', 'bathroom vanity', 'bathroom stall', 'mirror', 'blackboard', 'trash can', 'stair rail', 'box', 'towel', 'door', 'clothes', 'whiteboard', 'bed', 'floor', 'bathtub', 'desk', 'wardrobe', 'clothes dryer', 'radiator', 'shelf'] +COMMON_CATS_SCANNET_200 = ["cushion", "end table", "dining table", "keyboard", "bag", "toilet paper", "printer", "blanket", "microwave", "shoe", "computer tower", "bottle", "bin", "ottoman", "bench", "basket", "fan", "laptop", "person", "paper towel dispenser", "oven", "rack", "piano", "suitcase", "rail", "container", "telephone", "stand", "light", "laundry basket", "pipe", "seat", "column", "bicycle", "ladder", "jacket", "storage bin", "coffee maker", "dishwasher", "machine", "mat", "windowsill", "bulletin board", "fireplace", "mini fridge", "water cooler", "shower door", "pillar", "ledge", "furniture", "cart", "decoration", "closet door", "vacuum cleaner", "dish rack", "range hood", "projector screen", "divider", "bathroom counter", "laundry hamper", "bathroom stall door", "ceiling light", "trash bin", "bathroom cabinet", "structure", "storage organizer", "potted plant", "mattress"] +TAIL_CATS_SCANNET_200 = ["paper", "plate", "soap dispenser", "bucket", "clock", "guitar", "toilet paper holder", "speaker", "cup", "paper towel roll", "bar", "toaster", "ironing board", "soap dish", "toilet paper dispenser", "fire extinguisher", "ball", "hat", "shower curtain rod", "paper cutter", "tray", "toaster oven", "mouse", "toilet seat cover dispenser", "storage container", "scale", "tissue box", "light switch", "crate", "power outlet", "sign", "projector", "candle", "plunger", "stuffed animal", "headphones", "broom", "guitar case", "dustpan", "hair dryer", "water bottle", "handicap bar", "purse", "vent", "shower floor", "water pitcher", "bowl", "paper bag", "alarm clock", "music stand", "laundry detergent", "dumbbell", "tube", "cd case", "closet rod", "coffee kettle", "shower head", "keyboard piano", "case of water bottles", "coat rack", "folded chair", "fire alarm", "power strip", "calendar", "poster", "luggage"] + + +### Given the different size of the official train and val sets, not all ScanNet200 categories are present in the validation set. +### Here we list of categories with labels and IDs present in both train and validation set, and the remaining categories those are present in train, but not in val +### We dont evaluate on unseen validation categories in this benchmark + +VALID_CLASS_IDS_200_VALIDATION = ('wall', 'chair', 'floor', 'table', 'door', 'couch', 'cabinet', 'shelf', 'desk', 'office chair', 'bed', 'pillow', 'sink', 'picture', 'window', 'toilet', 'bookshelf', 'monitor', 'curtain', 'book', 'armchair', 'coffee table', 'box', 'refrigerator', 'lamp', 'kitchen cabinet', 'towel', 'clothes', 'tv', 'nightstand', 'counter', 'dresser', 'stool', 'cushion', 'plant', 'ceiling', 'bathtub', 'end table', 'dining table', 'keyboard', 'bag', 'backpack', 'toilet paper', 'printer', 'tv stand', 'whiteboard', 'blanket', 'shower curtain', 'trash can', 'closet', 'stairs', 'microwave', 'stove', 'shoe', 'computer tower', 'bottle', 'bin', 'ottoman', 'bench', 'board', 'washing machine', 'mirror', 'copier', 'basket', 'sofa chair', 'file cabinet', 'fan', 'laptop', 'shower', 'paper', 'person', 'paper towel dispenser', 'oven', 'blinds', 'rack', 'plate', 'blackboard', 'piano', 'suitcase', 'rail', 'radiator', 'recycling bin', 'container', 'wardrobe', 'soap dispenser', 'telephone', 'bucket', 'clock', 'stand', 'light', 'laundry basket', 'pipe', 'clothes dryer', 'guitar', 'toilet paper holder', 'seat', 'speaker', 'column', 'ladder', 'bathroom stall', 'shower wall', 'cup', 'jacket', 'storage bin', 'coffee maker', 'dishwasher', 'paper towel roll', 'machine', 'mat', 'windowsill', 'bar', 'toaster', 'bulletin board', 'ironing board', 'fireplace', 'soap dish', 'kitchen counter', 'doorframe', 'toilet paper dispenser', 'mini fridge', 'fire extinguisher', 'ball', 'hat', 'shower curtain rod', 'water cooler', 'paper cutter', 'tray', 'shower door', 'pillar', 'ledge', 'toaster oven', 'mouse', 'toilet seat cover dispenser', 'furniture', 'cart', 'scale', 'tissue box', 'light switch', 'crate', 'power outlet', 'decoration', 'sign', 'projector', 'closet door', 'vacuum cleaner', 'plunger', 'stuffed animal', 'headphones', 'dish rack', 'broom', 'range hood', 'dustpan', 'hair dryer', 'water bottle', 'handicap bar', 'vent', 'shower floor', 'water pitcher', 'mailbox', 'bowl', 'paper bag', 'projector screen', 'divider', 'laundry detergent', 'bathroom counter', 'object', 'bathroom vanity', 'closet wall', 'laundry hamper', 'bathroom stall door', 'ceiling light', 'trash bin', 'dumbbell', 'stair rail', 'tube', 'bathroom cabinet', 'closet rod', 'coffee kettle', 'shower head', 'keyboard piano', 'case of water bottles', 'coat rack', 'folded chair', 'fire alarm', 'power strip', 'calendar', 'poster', 'potted plant', 'mattress') + +CLASS_LABELS_200_VALIDATION = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 26, 27, 28, 29, 31, 32, 33, 34, 35, 36, 38, 39, 40, 41, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 82, 84, 86, 87, 88, 89, 90, 93, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 110, 112, 115, 116, 118, 120, 122, 125, 128, 130, 131, 132, 134, 136, 138, 139, 140, 141, 145, 148, 154, 155, 156, 157, 159, 161, 163, 165, 166, 168, 169, 170, 177, 180, 185, 188, 191, 193, 195, 202, 208, 213, 214, 229, 230, 232, 233, 242, 250, 261, 264, 276, 283, 300, 304, 312, 323, 325, 342, 356, 370, 392, 395, 408, 417, 488, 540, 562, 570, 609, 748, 776, 1156, 1163, 1164, 1165, 1166, 1167, 1168, 1169, 1170, 1171, 1172, 1173, 1175, 1176, 1179, 1180, 1181, 1182, 1184, 1185, 1186, 1187, 1188, 1189, 1191) + +VALID_CLASS_IDS_200_TRAIN_ONLY = ('bicycle', 'storage container', 'candle', 'guitar case', 'purse', 'alarm clock', 'music stand', 'cd case', 'structure', 'storage organizer', 'luggage') + +CLASS_LABELS_200_TRAIN_ONLY = (121, 221, 286, 331, 399, 572, 581, 1174, 1178, 1183, 1190) diff --git a/BenchmarkScripts/ScanNet200/utils.py b/BenchmarkScripts/ScanNet200/utils.py new file mode 100644 index 0000000..0c6b1f8 --- /dev/null +++ b/BenchmarkScripts/ScanNet200/utils.py @@ -0,0 +1,115 @@ +import os +import numpy as np +from plyfile import PlyData, PlyElement +import pandas as pd + +from scannet200_constants import * + +def read_plymesh(filepath): + """Read ply file and return it as numpy array. Returns None if emtpy.""" + with open(filepath, 'rb') as f: + plydata = PlyData.read(f) + if plydata.elements: + vertices = pd.DataFrame(plydata['vertex'].data).values + faces = np.array([f[0] for f in plydata["face"].data]) + return vertices, faces + +def save_plymesh(vertices, faces, filename, verbose=True, with_label=True): + """Save an RGB point cloud as a PLY file. + + Args: + points_3d: Nx6 matrix where points_3d[:, :3] are the XYZ coordinates and points_3d[:, 4:] are + the RGB values. If Nx3 matrix, save all points with [128, 128, 128] (gray) color. + """ + assert vertices.ndim == 2 + if with_label: + if vertices.shape[1] == 7: + python_types = (float, float, float, int, int, int, int) + npy_types = [('x', 'f4'), ('y', 'f4'), ('z', 'f4'), ('red', 'u1'), ('green', 'u1'), + ('blue', 'u1'), ('label', 'u4')] + + if vertices.shape[1] == 8: + python_types = (float, float, float, int, int, int, int, int) + npy_types = [('x', 'f4'), ('y', 'f4'), ('z', 'f4'), ('red', 'u1'), ('green', 'u1'), + ('blue', 'u1'), ('label', 'u4'), ('instance_id', 'u4')] + + else: + if vertices.shape[1] == 3: + gray_concat = np.tile(np.array([128], dtype=np.uint8), (vertices.shape[0], 3)) + vertices = np.hstack((vertices, gray_concat)) + elif vertices.shape[1] == 6: + python_types = (float, float, float, int, int, int) + npy_types = [('x', 'f4'), ('y', 'f4'), ('z', 'f4'), ('red', 'u1'), ('green', 'u1'), + ('blue', 'u1')] + else: + pass + + vertices_list = [] + for row_idx in range(vertices.shape[0]): + cur_point = vertices[row_idx] + vertices_list.append(tuple(dtype(point) for dtype, point in zip(python_types, cur_point))) + vertices_array = np.array(vertices_list, dtype=npy_types) + elements = [PlyElement.describe(vertices_array, 'vertex')] + + if faces is not None: + faces_array = np.empty(len(faces), dtype=[('vertex_indices', 'i4', (3,))]) + faces_array['vertex_indices'] = faces + elements += [PlyElement.describe(faces_array, 'face')] + + # Write + PlyData(elements).write(filename) + + if verbose is True: + print('Saved point cloud to: %s' % filename) + + +# Map the raw category id to the point cloud +def point_indices_from_group(points, seg_indices, group, labels_pd, CLASS_IDs): + group_segments = np.array(group['segments']) + label = group['label'] + + # Map the category name to id + label_ids = labels_pd[labels_pd['raw_category'] == label]['id'] + label_id = int(label_ids.iloc[0]) if len(label_ids) > 0 else 0 + + # Only store for the valid categories + if not label_id in CLASS_IDs: + label_id = 0 + + # get points, where segment indices (points labelled with segment ids) are in the group segment list + point_IDs = np.where(np.isin(seg_indices, group_segments)) + + return points[point_IDs], point_IDs[0], label_id + + +# Uncomment out if mesh voxelization is required +# import trimesh +# from trimesh.voxel import creation +# from sklearn.neighbors import KDTree +# import MinkowskiEngine as ME + + +# VOXELIZE the scene from sampling on the mesh directly instead of vertices +def voxelize_pointcloud(points, colors, labels, instances, faces, voxel_size=0.2): + + # voxelize mesh first and determine closest labels with KDTree search + trimesh_scene_mesh = trimesh.Trimesh(vertices=points, faces=faces) + voxel_grid = creation.voxelize(trimesh_scene_mesh, voxel_size) + voxel_cloud = np.asarray(voxel_grid.points) + orig_tree = KDTree(points, leaf_size=8) + _, voxel_pc_matches = orig_tree.query(voxel_cloud, k=1) + voxel_pc_matches = voxel_pc_matches.flatten() + + # match colors to voxel ids + points = points[voxel_pc_matches] / voxel_size + colors = colors[voxel_pc_matches] + labels = labels[voxel_pc_matches] + instances = instances[voxel_pc_matches] + + # Voxelize scene + quantized_scene, scene_inds = ME.utils.sparse_quantize(points, return_index=True) + quantized_scene_colors = colors[scene_inds] + quantized_labels = labels[scene_inds] + quantized_instances = instances[scene_inds] + + return quantized_scene, quantized_scene_colors, quantized_labels, quantized_instances diff --git a/BenchmarkScripts/convert2panoptic.py b/BenchmarkScripts/convert2panoptic.py new file mode 100644 index 0000000..d1c919f --- /dev/null +++ b/BenchmarkScripts/convert2panoptic.py @@ -0,0 +1,170 @@ +#!/usr/bin/python +# +# Convert to COCO-style panoptic segmentation format (http://cocodataset.org/#format-data). +# + +# python imports +from __future__ import print_function, absolute_import, division, unicode_literals +import os +import glob +import sys +import argparse +import json +import numpy as np + +# Image processing +from PIL import Image + +EVAL_LABELS = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39] +EVAL_LABEL_NAMES = ["wall", "floor", "cabinet", "bed", "chair", "sofa", "table", "door", "window", "bookshelf", "picture", "counter", "desk", "curtain", "refrigerator", "shower curtain", "toilet", "sink", "bathtub", "otherfurniture"] +EVAL_LABEL_CATS = ["indoor", "indoor", "furniture", "furniture", "furniture", "furniture", "furniture", "furniture", "furniture", "furniture", "furniture", "furniture", "furniture", "furniture", "appliance", "furniture", "furniture", "appliance", "furniture", "furniture"] +EVAL_LABEL_COLORS = [(174, 199, 232), (152, 223, 138), (31, 119, 180), (255, 187, 120), (188, 189, 34), (140, 86, 75), (255, 152, 150), (214, 39, 40), (197, 176, 213), (148, 103, 189), (196, 156, 148), (23, 190, 207), (247, 182, 210), (219, 219, 141), (255, 127, 14), (158, 218, 229), (44, 160, 44), (112, 128, 144), (227, 119, 194), (82, 84, 163)] + +def splitall(path): + allparts = [] + while 1: + parts = os.path.split(path) + if parts[0] == path: # sentinel for absolute paths + allparts.insert(0, parts[0]) + break + elif parts[1] == path: # sentinel for relative paths + allparts.insert(0, parts[1]) + break + else: + path = parts[0] + allparts.insert(0, parts[1]) + return allparts + +# The main method +def convert2panoptic(scannetPath, outputFolder=None): + + if outputFolder is None: + outputFolder = scannetPath + + # find files + search = os.path.join(scannetPath, "*", "instance", "*.png") + files = glob.glob(search) + files.sort() + # quit if we did not find anything + if not files: + print( + "Did not find any files for using matching pattern {}. Please consult the README.".format(search) + ) + sys.exit(-1) + # a bit verbose + print("Converting {} annotation files.".format(len(files))) + + outputBaseFile = "scannet_panoptic" + outFile = os.path.join(outputFolder, "{}.json".format(outputBaseFile)) + print("Json file with the annotations in panoptic format will be saved in {}".format(outFile)) + panopticFolder = os.path.join(outputFolder, outputBaseFile) + if not os.path.isdir(panopticFolder): + print("Creating folder {} for panoptic segmentation PNGs".format(panopticFolder)) + os.mkdir(panopticFolder) + print("Corresponding segmentations in .png format will be saved in {}".format(panopticFolder)) + + categories = [] + for idx in range(len(EVAL_LABELS)): + label = EVAL_LABELS[idx] + name = EVAL_LABEL_NAMES[idx] + cat = EVAL_LABEL_CATS[idx] + color = EVAL_LABEL_COLORS[idx] + isthing = label > 2 + categories.append({'id': int(label), + 'name': name, + 'color': color, + 'supercategory': cat, + 'isthing': isthing}) + + images = [] + annotations = [] + for progress, f in enumerate(files): + + originalFormat = np.array(Image.open(f)) + + parts = splitall(f) + fileName = parts[-1] + sceneName = parts[-3] + outputFileName = "{}__{}".format(sceneName, fileName) + inputFileName = os.path.join(sceneName, "color", fileName) + imageId = os.path.splitext(outputFileName)[0] + # image entry, id for image is its filename without extension + images.append({"id": imageId, + "width": int(originalFormat.shape[1]), + "height": int(originalFormat.shape[0]), + "file_name": inputFileName}) + + pan_format = np.zeros( + (originalFormat.shape[0], originalFormat.shape[1], 3), dtype=np.uint8 + ) + segmentIds = np.unique(originalFormat) + segmInfo = [] + for segmentId in segmentIds: + isCrowd = 0 + if segmentId < 1000: + semanticId = segmentId + else: + semanticId = segmentId // 1000 + if semanticId not in EVAL_LABELS: + continue + + mask = originalFormat == segmentId + color = [segmentId % 256, segmentId // 256, segmentId // 256 // 256] + pan_format[mask] = color + + area = np.sum(mask) # segment area computation + + # bbox computation for a segment + hor = np.sum(mask, axis=0) + hor_idx = np.nonzero(hor)[0] + x = hor_idx[0] + width = hor_idx[-1] - x + 1 + vert = np.sum(mask, axis=1) + vert_idx = np.nonzero(vert)[0] + y = vert_idx[0] + height = vert_idx[-1] - y + 1 + bbox = [int(x), int(y), int(width), int(height)] + + segmInfo.append({"id": int(segmentId), + "category_id": int(semanticId), + "area": int(area), + "bbox": bbox, + "iscrowd": isCrowd}) + + annotations.append({'image_id': imageId, + 'file_name': outputFileName, + "segments_info": segmInfo}) + + Image.fromarray(pan_format).save(os.path.join(panopticFolder, outputFileName)) + + print("\rProgress: {:>3.2f} %".format((progress + 1) * 100 / len(files)), end=' ') + sys.stdout.flush() + + print("\nSaving the json file {}".format(outFile)) + d = {'images': images, + 'annotations': annotations, + 'categories': categories} + with open(outFile, 'w') as f: + json.dump(d, f, sort_keys=True, indent=4) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--dataset-folder", + dest="scannetPath", + help="path to the ScanNet data 'scannet_frames_25k' folder", + required=True, + type=str) + parser.add_argument("--output-folder", + dest="outputFolder", + help="path to the output folder.", + default=None, + type=str) + args = parser.parse_args() + + convert2panoptic(args.scannetPath, args.outputFolder) + + +# call the main +if __name__ == "__main__": + main() diff --git a/README.md b/README.md index b9644e1..17eed44 100644 --- a/README.md +++ b/README.md @@ -4,9 +4,9 @@ ScanNet is an RGB-D video dataset containing 2.5 million views in more than 1500 ## ScanNet Data -If you would like to download the ScanNet data, please fill out an agreement to the [ScanNet Terms of Use](http://kaldir.vc.in.tum.de/scannet/ScanNet_TOS.pdf) and send it to us at scannet@googlegroups.com. +If you would like to download the ScanNet data, fill out an agreement to the [ScanNet Terms of Use](http://kaldir.vc.cit.tum.de/scannet/ScanNet_TOS.pdf), using your institutional email addresses, and send it to us at scannet@googlegroups.com. -If you have not received a response within a week, it is likely that your email is bouncing - please check this before sending repeat requests. +If you have not received a response within a week, it is likely that your email is bouncing - please check this before sending repeat requests. Please do not reply to the noreply email - your email won't be seen. Please check the [changelog](http://www.scan-net.org/changelog) for updates to the data release. @@ -28,7 +28,7 @@ The data in ScanNet is organized by RGB-D sequence. Each sequence is stored unde |-- _vh_clean_2.0.010000.segs.json, _vh_clean.segs.json Over-segmentation of lo-res, hi-res meshes, respectively (referenced by aggregated semantic annotations) |-- _vh_clean_2.labels.ply - Visualization of aggregated semantic segmentation; colored by nyu40 labels (see img/legend; ply property 'label' denotes the ScanNet label id) + Visualization of aggregated semantic segmentation; colored by nyu40 labels (see img/legend; ply property 'label' denotes the nyu40 label id) |-- _2d-label.zip Raw 2d projections of aggregated annotation labels as 16-bit pngs with ScanNet label ids |-- _2d-instance.zip diff --git a/SensReader/c++/README.txt b/SensReader/c++/README.txt index 246287c..906db83 100644 --- a/SensReader/c++/README.txt +++ b/SensReader/c++/README.txt @@ -11,6 +11,8 @@ Run: Hint: keep the sens files as they are a nice represention see processFrame(..) to decode independent frames + +For additional functionality (vector/matrix/point cloud classes, etc), include the mLib library: https://github.com/niessner/mLib. - tested under Windows10 VS2013 - tested 14.04.1-Ubuntu: g++ and clang diff --git a/SensReader/python/README.md b/SensReader/python/README.md index 7ab28cd..60da5a8 100644 --- a/SensReader/python/README.md +++ b/SensReader/python/README.md @@ -1,5 +1,7 @@ # Data Exporter +Developed and tested with python 2.7. + Usage: ``` python reader.py --filename [.sens file to export data from] --output_path [output directory to export data to] diff --git a/SensReader/python/SensorData.py b/SensReader/python/SensorData.py index 9a67d1c..16215ca 100644 --- a/SensReader/python/SensorData.py +++ b/SensReader/python/SensorData.py @@ -4,6 +4,7 @@ import zlib import imageio import cv2 +import png COMPRESSION_TYPE_COLOR = {-1:'unknown', 0:'raw', 1:'png', 2:'jpeg'} COMPRESSION_TYPE_DEPTH = {-1:'unknown', 0:'raw_ushort', 1:'zlib_ushort', 2:'occi_ushort'} @@ -83,8 +84,11 @@ def export_depth_images(self, output_path, image_size=None, frame_skip=1): depth = np.fromstring(depth_data, dtype=np.uint16).reshape(self.depth_height, self.depth_width) if image_size is not None: depth = cv2.resize(depth, (image_size[1], image_size[0]), interpolation=cv2.INTER_NEAREST) - imageio.imwrite(os.path.join(output_path, str(f) + '.png'), depth) - + #imageio.imwrite(os.path.join(output_path, str(f) + '.png'), depth) + with open(os.path.join(output_path, str(f) + '.png'), 'wb') as f: # write 16-bit + writer = png.Writer(width=depth.shape[1], height=depth.shape[0], bitdepth=16) + depth = depth.reshape(-1, depth.shape[1]).tolist() + writer.write(f, depth) def export_color_images(self, output_path, image_size=None, frame_skip=1): if not os.path.exists(output_path): diff --git a/img/legend.png b/img/legend.png index d14e19c..0354c93 100644 Binary files a/img/legend.png and b/img/legend.png differ