Skip to content

domainnet

DomainNet

Bases: BaseDataset

A large dataset used in "Moment Matching for Multi-Source Domain Adaptation". It consists of 345 classes in 6 domains: clipart, infograph, painting, quickdraw, real, sketch

Source code in pytorch_adapt\datasets\domainnet.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
class DomainNet(BaseDataset):
    """
    A large dataset used in "Moment Matching for Multi-Source Domain Adaptation".
    It consists of 345 classes in 6 domains:
    clipart, infograph, painting, quickdraw, real, sketch
    """

    def __init__(self, root: str, domain: str, train: bool, transform, **kwargs):
        """
        Arguments:
            root: The dataset must be located at ```<root>/domainnet```
            domain: One of the 6 domains
            train: Whether or not to use the training set.
            transform: The image transform applied to each sample.
        """
        super().__init__(domain=domain, **kwargs)
        if not isinstance(train, bool):
            raise TypeError("train should be True or False")
        name = "train" if train else "test"
        labels_file = os.path.join(root, "domainnet", f"{domain}_{name}.txt")
        img_dir = os.path.join(root, "domainnet")

        with open(labels_file) as f:
            content = [line.rstrip().split(" ") for line in f]
        self.img_paths = [os.path.join(img_dir, x[0]) for x in content]
        check_img_paths(img_dir, self.img_paths, domain)
        check_length(
            self,
            {
                "clipart": {"train": 33525, "test": 14604}[name],
                "infograph": {"train": 36023, "test": 15582}[name],
                "painting": {"train": 50416, "test": 21850}[name],
                "quickdraw": {"train": 120750, "test": 51750}[name],
                "real": {"train": 120906, "test": 52041}[name],
                "sketch": {"train": 48212, "test": 20916}[name],
            }[domain],
        )
        self.labels = [int(x[1]) for x in content]
        self.transform = transform

__init__(root, domain, train, transform, **kwargs)

Parameters:

Name Type Description Default
root str

The dataset must be located at <root>/domainnet

required
domain str

One of the 6 domains

required
train bool

Whether or not to use the training set.

required
transform

The image transform applied to each sample.

required
Source code in pytorch_adapt\datasets\domainnet.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
def __init__(self, root: str, domain: str, train: bool, transform, **kwargs):
    """
    Arguments:
        root: The dataset must be located at ```<root>/domainnet```
        domain: One of the 6 domains
        train: Whether or not to use the training set.
        transform: The image transform applied to each sample.
    """
    super().__init__(domain=domain, **kwargs)
    if not isinstance(train, bool):
        raise TypeError("train should be True or False")
    name = "train" if train else "test"
    labels_file = os.path.join(root, "domainnet", f"{domain}_{name}.txt")
    img_dir = os.path.join(root, "domainnet")

    with open(labels_file) as f:
        content = [line.rstrip().split(" ") for line in f]
    self.img_paths = [os.path.join(img_dir, x[0]) for x in content]
    check_img_paths(img_dir, self.img_paths, domain)
    check_length(
        self,
        {
            "clipart": {"train": 33525, "test": 14604}[name],
            "infograph": {"train": 36023, "test": 15582}[name],
            "painting": {"train": 50416, "test": 21850}[name],
            "quickdraw": {"train": 120750, "test": 51750}[name],
            "real": {"train": 120906, "test": 52041}[name],
            "sketch": {"train": 48212, "test": 20916}[name],
        }[domain],
    )
    self.labels = [int(x[1]) for x in content]
    self.transform = transform

DomainNet126

Bases: BaseDownloadableDataset

A custom train/test split of DomainNet126Full.

Source code in pytorch_adapt\datasets\domainnet.py
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
class DomainNet126(BaseDownloadableDataset):
    """
    A custom train/test split of DomainNet126Full.
    """

    url = "https://cornell.box.com/shared/static/5uu0v3rs9heusbiht2nn1gbn4yfspas6"
    filename = "domainnet126.tar.gz"
    md5 = "50f29fa0152d715c036c813ad67502d6"

    def __init__(self, root: str, domain: str, train: bool, transform=None, **kwargs):
        """
        Arguments:
            root: The dataset must be located at ```<root>/domainnet```
            domain: One of the 4 domains
            train: Whether or not to use the training set.
            transform: The image transform applied to each sample.
        """
        self.train = check_train(train)
        super().__init__(root=root, domain=domain, **kwargs)
        self.transform = transform

    def set_paths_and_labels(self, root):
        name = "train" if self.train else "test"
        labels_file = os.path.join(root, "domainnet", f"{self.domain}126_{name}.txt")
        img_dir = os.path.join(root, "domainnet")

        with open(labels_file) as f:
            content = [line.rstrip().split(" ") for line in f]
        self.img_paths = [os.path.join(img_dir, x[0]) for x in content]
        check_img_paths(img_dir, self.img_paths, self.domain)
        check_length(
            self,
            {
                "clipart": {"train": 14962, "test": 3741}[name],
                "painting": {"train": 25201, "test": 6301}[name],
                "real": {"train": 56286, "test": 14072}[name],
                "sketch": {"train": 19665, "test": 4917}[name],
            }[self.domain],
        )
        self.labels = [int(x[1]) for x in content]

__init__(root, domain, train, transform=None, **kwargs)

Parameters:

Name Type Description Default
root str

The dataset must be located at <root>/domainnet

required
domain str

One of the 4 domains

required
train bool

Whether or not to use the training set.

required
transform

The image transform applied to each sample.

None
Source code in pytorch_adapt\datasets\domainnet.py
 96
 97
 98
 99
100
101
102
103
104
105
106
def __init__(self, root: str, domain: str, train: bool, transform=None, **kwargs):
    """
    Arguments:
        root: The dataset must be located at ```<root>/domainnet```
        domain: One of the 4 domains
        train: Whether or not to use the training set.
        transform: The image transform applied to each sample.
    """
    self.train = check_train(train)
    super().__init__(root=root, domain=domain, **kwargs)
    self.transform = transform

DomainNet126Full

Bases: BaseDataset

A subset of DomainNet consisting of 126 classes and 4 domains: clipart, painting, real, sketch

Source code in pytorch_adapt\datasets\domainnet.py
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
class DomainNet126Full(BaseDataset):
    """
    A subset of DomainNet consisting of 126 classes and 4 domains:
    clipart, painting, real, sketch
    """

    def __init__(self, root: str, domain: str, transform, **kwargs):
        """
        Arguments:
            root: The dataset must be located at ```<root>/domainnet```
            domain: One of the 4 domains
            transform: The image transform applied to each sample.
        """
        super().__init__(domain=domain, **kwargs)
        filenames = [
            f"labeled_source_images_{domain}",
            f"labeled_target_images_{domain}_1",
            f"labeled_target_images_{domain}_3",
            f"unlabeled_target_images_{domain}_1",
            f"unlabeled_target_images_{domain}_3",
            f"validation_target_images_{domain}_3",
        ]
        filenames = [os.path.join(root, "domainnet", f"{f}.txt") for f in filenames]
        img_dir = os.path.join(root, "domainnet")

        content = OrderedDict()
        for f in filenames:
            with open(f) as fff:
                for line in fff:
                    path, label = line.rstrip().split(" ")
                    content[path] = label

        self.img_paths = [os.path.join(img_dir, x) for x in content.keys()]
        check_img_paths(img_dir, self.img_paths, domain)
        self.labels = [int(x) for x in content.values()]
        self.transform = transform

__init__(root, domain, transform, **kwargs)

Parameters:

Name Type Description Default
root str

The dataset must be located at <root>/domainnet

required
domain str

One of the 4 domains

required
transform

The image transform applied to each sample.

required
Source code in pytorch_adapt\datasets\domainnet.py
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
def __init__(self, root: str, domain: str, transform, **kwargs):
    """
    Arguments:
        root: The dataset must be located at ```<root>/domainnet```
        domain: One of the 4 domains
        transform: The image transform applied to each sample.
    """
    super().__init__(domain=domain, **kwargs)
    filenames = [
        f"labeled_source_images_{domain}",
        f"labeled_target_images_{domain}_1",
        f"labeled_target_images_{domain}_3",
        f"unlabeled_target_images_{domain}_1",
        f"unlabeled_target_images_{domain}_3",
        f"validation_target_images_{domain}_3",
    ]
    filenames = [os.path.join(root, "domainnet", f"{f}.txt") for f in filenames]
    img_dir = os.path.join(root, "domainnet")

    content = OrderedDict()
    for f in filenames:
        with open(f) as fff:
            for line in fff:
                path, label = line.rstrip().split(" ")
                content[path] = label

    self.img_paths = [os.path.join(img_dir, x) for x in content.keys()]
    check_img_paths(img_dir, self.img_paths, domain)
    self.labels = [int(x) for x in content.values()]
    self.transform = transform