Skip to content

Base

BaseDerivativeGenerator

A factory class for generating derivative files in a given format.

Source code in ckanext/versioned_datastore/lib/downloads/derivatives/base.py
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
class BaseDerivativeGenerator(metaclass=ABCMeta):
    """
    A factory class for generating derivative files in a given format.
    """

    name = 'base'
    extension = None

    RESOURCE_ID_FIELD_NAME = 'Source resource ID'

    def __init__(self, output_dir, fields, query, resource_id=None, **format_args):
        self.output_dir = output_dir
        self.output_name = os.extsep.join(
            [resource_id or 'resource', self.extension or self.name]
        )

        if resource_id:
            self.all_fields = fields + [self.RESOURCE_ID_FIELD_NAME]
        else:
            self.all_fields = fields
        # split the fields by file; there should not be any keys here that are not in
        # self.file_paths (though this only has to contain keys for files with fields,
        # e.g. not manifest files or similar)
        self.fields = {'main': self.all_fields}

        # some derivatives might need access to the original query
        self._query = query

        self.resource_id = resource_id

        # holds any extra args not captured by the inherited __init__()
        self.format_args = format_args

        # a derivative may have multiple component files, but most will just have the one (the
        # output file). None of them _have_ to be the output filename.
        self.file_paths = {'main': os.path.join(self.output_dir, self.output_name)}
        # this will contain open file handles
        self.files = {}
        # indicators
        self._initialised = False
        self._opened = False
        self._validated = False

    def __enter__(self):
        for fn, fp in self.file_paths.items():
            self.files[fn] = open(fp, 'a')
        self._opened = True
        self.setup()
        if not self._initialised:
            self.initialise()
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.finalise()
        for f in self.files.values():
            try:
                f.close()
            except AttributeError:
                pass
        self.files = {}
        self._opened = False

    @property
    def main_file(self):
        if 'main' in self.files:
            return self.files['main']
        if len(self.files.values()) == 1:
            return list(self.files.values())[0]
        else:
            return

    def initialise(self):
        """
        Runs after files have opened, before any records are processed.

        Only runs the first time the files are opened; in a multi-resource generator,
        files may be opened multiple times, but this will only be run once. Use setup()
        for things that need to be run every time.
        """
        self._initialised = True

    def setup(self):
        """
        Runs every time files are opened, before any records are processed.

        Runs before initialise().
        """
        pass

    def validate(self, record):
        """
        Runs when the first record is processed.
        """
        self._validated = True

    def write(self, record):
        if not self._validated:
            self.validate(record)
        self._write(record)

    def finalise(self):
        """
        Runs when files close.
        """
        pass

    def cleanup(self):
        """
        Runs when the generator has finished, i.e. after a single resource in separate-
        resources requests and after all resources in combined requests.
        """
        pass

    @abstractmethod
    def _write(self, record):
        raise NotImplemented

cleanup()

Runs when the generator has finished, i.e. after a single resource in separate- resources requests and after all resources in combined requests.

Source code in ckanext/versioned_datastore/lib/downloads/derivatives/base.py
111
112
113
114
115
116
def cleanup(self):
    """
    Runs when the generator has finished, i.e. after a single resource in separate-
    resources requests and after all resources in combined requests.
    """
    pass

finalise()

Runs when files close.

Source code in ckanext/versioned_datastore/lib/downloads/derivatives/base.py
105
106
107
108
109
def finalise(self):
    """
    Runs when files close.
    """
    pass

initialise()

Runs after files have opened, before any records are processed.

Only runs the first time the files are opened; in a multi-resource generator, files may be opened multiple times, but this will only be run once. Use setup() for things that need to be run every time.

Source code in ckanext/versioned_datastore/lib/downloads/derivatives/base.py
76
77
78
79
80
81
82
83
84
def initialise(self):
    """
    Runs after files have opened, before any records are processed.

    Only runs the first time the files are opened; in a multi-resource generator,
    files may be opened multiple times, but this will only be run once. Use setup()
    for things that need to be run every time.
    """
    self._initialised = True

setup()

Runs every time files are opened, before any records are processed.

Runs before initialise().

Source code in ckanext/versioned_datastore/lib/downloads/derivatives/base.py
86
87
88
89
90
91
92
def setup(self):
    """
    Runs every time files are opened, before any records are processed.

    Runs before initialise().
    """
    pass

validate(record)

Runs when the first record is processed.

Source code in ckanext/versioned_datastore/lib/downloads/derivatives/base.py
94
95
96
97
98
def validate(self, record):
    """
    Runs when the first record is processed.
    """
    self._validated = True