Skip to content

V1 0 0

v1_0_0Hasher

Query hasher class for the v1.0.0 query schema.

Source code in ckanext/versioned_datastore/lib/query/schemas/v1_0_0.py
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
class v1_0_0Hasher:
    """
    Query hasher class for the v1.0.0 query schema.
    """

    def hash_query(self, query):
        """
        Stable hash function for v1.0.0 queries.

        :param query: the query dict
        :returns: the hex digest
        """
        query_hash = hashlib.sha1()
        if 'search' in query:
            query_hash.update(f'search:{query["search"]}'.encode('utf-8'))
        if 'filters' in query:
            data = f'filters:{self.create_group_or_term(query["filters"])}'.encode(
                'utf-8'
            )
            query_hash.update(data)
        return query_hash.hexdigest()

    def create_group_or_term(self, group_or_term):
        """
        Creates and returns a string version of the given group or term dict and returns
        it.

        :param group_or_term: a dict defining a single group or term
        :returns: a string representing the group or term
        """
        # only one property is allowed so we can safely just extract the only name and
        # options
        group_or_term_type, group_or_term_options = next(iter(group_or_term.items()))
        return getattr(self, f'create_{group_or_term_type}')(group_or_term_options)

    def create_and(self, group):
        """
        Creates and returns a string version of the given group as an and query.

        :param group: the group to build the and from
        :returns: a string representing the group
        """
        # sorting the members makes this stable
        members = sorted(self.create_group_or_term(member) for member in group)
        return f'and:[{"|".join(members)}]'

    def create_or(self, group):
        """
        Creates and returns a string version of the given group as an or query.

        :param group: the group to build the or from
        :returns: a string representing the group
        """
        # sorting the members makes this stable
        members = sorted(self.create_group_or_term(member) for member in group)
        return f'or:[{"|".join(members)}]'

    def create_not(self, group):
        """
        Creates and returns a string version of the given group as a not query.

        :param group: the group to build the not from
        :returns: a string representing the group
        """
        # sorting the members makes this stable
        members = sorted(self.create_group_or_term(member) for member in group)
        return f'not:[{"|".join(members)}]'

    @staticmethod
    def create_string_equals(options):
        """
        Given the options for a string_equals term, creates and returns a string version
        of it.

        :param options: the options for the string_equals query
        :returns: a string representing the term
        """
        # sorting the fields makes this stable
        fields = ','.join(sorted(options['fields']))
        return f'string_equals:{fields};{options["value"]}'

    @staticmethod
    def create_string_contains(options):
        """
        Given the options for a string_contains term, creates and returns a string
        version of it.

        :param options: the options for the string_contains query
        :returns: a string representing the term
        """
        # sorting the fields makes this stable
        fields = ','.join(sorted(options['fields']))
        return f'string_contains:{fields};{options["value"]}'

    @staticmethod
    def create_number_equals(options):
        """
        Given the options for a number_equals term, creates and returns a string version
        of it.

        :param options: the options for the number_equals query
        :returns: a string representing the term
        """
        # sorting the fields makes this stable
        fields = ','.join(sorted(options['fields']))
        return f'number_equals:{fields};{options["value"]}'

    @staticmethod
    def create_number_range(options):
        """
        Given the options for a number_range term, creates and returns a string version
        of it.

        :param options: the options for the number_range query
        :returns: a string representing the term
        """
        # sorting the fields makes this stable
        fields = ','.join(sorted(options['fields']))
        hash_value = f'number_range:{fields};'

        less_than = options.get('less_than', None)
        less_than_inclusive = options.get('less_than_inclusive', True)
        if less_than is not None:
            hash_value += '<'
            if less_than_inclusive:
                hash_value += '='
            hash_value += str(less_than)

        greater_than = options.get('greater_than', None)
        greater_than_inclusive = options.get('greater_than_inclusive', True)
        if greater_than is not None:
            hash_value += '>'
            if greater_than_inclusive:
                hash_value += '='
            hash_value += str(greater_than)

        return hash_value

    @staticmethod
    def create_exists(options):
        """
        Given the options for a exists term, creates and returns a string version of it.

        :param options: the options for the exists query
        :returns: a string representing the term
        """
        if options.get('geo_field', False):
            return 'geo_exists'
        else:
            # sorting the fields makes this stable
            fields = ','.join(sorted(options['fields']))
            return f'exists:{fields}'

    @staticmethod
    def create_geo_point(options):
        """
        Given the options for a geo_point term, creates and returns a string version of
        it.

        :param options: the options for the geo_point query
        :returns: a string representing the term
        """
        distance = f'{options.get("radius", 0)}{options.get("radius_unit", "m")}'
        return f'geo_point:{distance};{options["latitude"]};{options["longitude"]}'

    @staticmethod
    def create_geo_named_area(options):
        """
        Given the options for a geo_named_area term, creates and returns a string
        version of it.

        :param options: the options for the geo_named_area query
        :returns: a string representing the term
        """
        return 'geo_named_area:{};{}'.format(*next(iter(options.items())))

    def create_geo_custom_area(self, coordinates):
        """
        Given the coordinates for a geo_custom_area term, creates and returns a string
        version of it.

        :param coordinates: the coordinates for the geo_custom_area query
        :returns: a string representing the term
        """
        queries = []
        # the first list is a list of GeoJSON Polygons
        for polygon in coordinates:
            # then the Polygon is a list containing at least one element. The first
            # element is the outer boundary shape of the polygon and any other elements
            # are holes in this shape
            outer, holes = polygon[0], polygon[1:]
            outer_query = self.build_geo_polygon_query(outer)

            if holes:
                # sort the holes to ensure stability
                holes_queries = sorted(
                    self.build_geo_polygon_query(hole) for hole in holes
                )
                # create a query which filters the outer query but filters out the holes
                queries.append(f'{outer_query}/{holes_queries}')
            else:
                queries.append(outer_query)

        return f'geo_custom_area:{";".join(queries)}'

    @staticmethod
    def build_geo_polygon_query(points):
        """
        Given a series of points, returns a string version of them. Note that we don't
        sort them as that could change the meaning.

        :param points: the points as lat lon pairs
        :returns: a string representing the points
        """
        return ','.join(f'[{point[1]},{point[0]}]' for point in points)

build_geo_polygon_query(points) staticmethod

Given a series of points, returns a string version of them. Note that we don't sort them as that could change the meaning.

Parameters:

Name Type Description Default
points

the points as lat lon pairs

required

Returns:

Type Description

a string representing the points

Source code in ckanext/versioned_datastore/lib/query/schemas/v1_0_0.py
664
665
666
667
668
669
670
671
672
673
@staticmethod
def build_geo_polygon_query(points):
    """
    Given a series of points, returns a string version of them. Note that we don't
    sort them as that could change the meaning.

    :param points: the points as lat lon pairs
    :returns: a string representing the points
    """
    return ','.join(f'[{point[1]},{point[0]}]' for point in points)

create_and(group)

Creates and returns a string version of the given group as an and query.

Parameters:

Name Type Description Default
group

the group to build the and from

required

Returns:

Type Description

a string representing the group

Source code in ckanext/versioned_datastore/lib/query/schemas/v1_0_0.py
494
495
496
497
498
499
500
501
502
503
def create_and(self, group):
    """
    Creates and returns a string version of the given group as an and query.

    :param group: the group to build the and from
    :returns: a string representing the group
    """
    # sorting the members makes this stable
    members = sorted(self.create_group_or_term(member) for member in group)
    return f'and:[{"|".join(members)}]'

create_exists(options) staticmethod

Given the options for a exists term, creates and returns a string version of it.

Parameters:

Name Type Description Default
options

the options for the exists query

required

Returns:

Type Description

a string representing the term

Source code in ckanext/versioned_datastore/lib/query/schemas/v1_0_0.py
597
598
599
600
601
602
603
604
605
606
607
608
609
610
@staticmethod
def create_exists(options):
    """
    Given the options for a exists term, creates and returns a string version of it.

    :param options: the options for the exists query
    :returns: a string representing the term
    """
    if options.get('geo_field', False):
        return 'geo_exists'
    else:
        # sorting the fields makes this stable
        fields = ','.join(sorted(options['fields']))
        return f'exists:{fields}'

create_geo_custom_area(coordinates)

Given the coordinates for a geo_custom_area term, creates and returns a string version of it.

Parameters:

Name Type Description Default
coordinates

the coordinates for the geo_custom_area query

required

Returns:

Type Description

a string representing the term

Source code in ckanext/versioned_datastore/lib/query/schemas/v1_0_0.py
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
def create_geo_custom_area(self, coordinates):
    """
    Given the coordinates for a geo_custom_area term, creates and returns a string
    version of it.

    :param coordinates: the coordinates for the geo_custom_area query
    :returns: a string representing the term
    """
    queries = []
    # the first list is a list of GeoJSON Polygons
    for polygon in coordinates:
        # then the Polygon is a list containing at least one element. The first
        # element is the outer boundary shape of the polygon and any other elements
        # are holes in this shape
        outer, holes = polygon[0], polygon[1:]
        outer_query = self.build_geo_polygon_query(outer)

        if holes:
            # sort the holes to ensure stability
            holes_queries = sorted(
                self.build_geo_polygon_query(hole) for hole in holes
            )
            # create a query which filters the outer query but filters out the holes
            queries.append(f'{outer_query}/{holes_queries}')
        else:
            queries.append(outer_query)

    return f'geo_custom_area:{";".join(queries)}'

create_geo_named_area(options) staticmethod

Given the options for a geo_named_area term, creates and returns a string version of it.

Parameters:

Name Type Description Default
options

the options for the geo_named_area query

required

Returns:

Type Description

a string representing the term

Source code in ckanext/versioned_datastore/lib/query/schemas/v1_0_0.py
624
625
626
627
628
629
630
631
632
633
@staticmethod
def create_geo_named_area(options):
    """
    Given the options for a geo_named_area term, creates and returns a string
    version of it.

    :param options: the options for the geo_named_area query
    :returns: a string representing the term
    """
    return 'geo_named_area:{};{}'.format(*next(iter(options.items())))

create_geo_point(options) staticmethod

Given the options for a geo_point term, creates and returns a string version of it.

Parameters:

Name Type Description Default
options

the options for the geo_point query

required

Returns:

Type Description

a string representing the term

Source code in ckanext/versioned_datastore/lib/query/schemas/v1_0_0.py
612
613
614
615
616
617
618
619
620
621
622
@staticmethod
def create_geo_point(options):
    """
    Given the options for a geo_point term, creates and returns a string version of
    it.

    :param options: the options for the geo_point query
    :returns: a string representing the term
    """
    distance = f'{options.get("radius", 0)}{options.get("radius_unit", "m")}'
    return f'geo_point:{distance};{options["latitude"]};{options["longitude"]}'

create_group_or_term(group_or_term)

Creates and returns a string version of the given group or term dict and returns it.

Parameters:

Name Type Description Default
group_or_term

a dict defining a single group or term

required

Returns:

Type Description

a string representing the group or term

Source code in ckanext/versioned_datastore/lib/query/schemas/v1_0_0.py
481
482
483
484
485
486
487
488
489
490
491
492
def create_group_or_term(self, group_or_term):
    """
    Creates and returns a string version of the given group or term dict and returns
    it.

    :param group_or_term: a dict defining a single group or term
    :returns: a string representing the group or term
    """
    # only one property is allowed so we can safely just extract the only name and
    # options
    group_or_term_type, group_or_term_options = next(iter(group_or_term.items()))
    return getattr(self, f'create_{group_or_term_type}')(group_or_term_options)

create_not(group)

Creates and returns a string version of the given group as a not query.

Parameters:

Name Type Description Default
group

the group to build the not from

required

Returns:

Type Description

a string representing the group

Source code in ckanext/versioned_datastore/lib/query/schemas/v1_0_0.py
516
517
518
519
520
521
522
523
524
525
def create_not(self, group):
    """
    Creates and returns a string version of the given group as a not query.

    :param group: the group to build the not from
    :returns: a string representing the group
    """
    # sorting the members makes this stable
    members = sorted(self.create_group_or_term(member) for member in group)
    return f'not:[{"|".join(members)}]'

create_number_equals(options) staticmethod

Given the options for a number_equals term, creates and returns a string version of it.

Parameters:

Name Type Description Default
options

the options for the number_equals query

required

Returns:

Type Description

a string representing the term

Source code in ckanext/versioned_datastore/lib/query/schemas/v1_0_0.py
553
554
555
556
557
558
559
560
561
562
563
564
@staticmethod
def create_number_equals(options):
    """
    Given the options for a number_equals term, creates and returns a string version
    of it.

    :param options: the options for the number_equals query
    :returns: a string representing the term
    """
    # sorting the fields makes this stable
    fields = ','.join(sorted(options['fields']))
    return f'number_equals:{fields};{options["value"]}'

create_number_range(options) staticmethod

Given the options for a number_range term, creates and returns a string version of it.

Parameters:

Name Type Description Default
options

the options for the number_range query

required

Returns:

Type Description

a string representing the term

Source code in ckanext/versioned_datastore/lib/query/schemas/v1_0_0.py
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
@staticmethod
def create_number_range(options):
    """
    Given the options for a number_range term, creates and returns a string version
    of it.

    :param options: the options for the number_range query
    :returns: a string representing the term
    """
    # sorting the fields makes this stable
    fields = ','.join(sorted(options['fields']))
    hash_value = f'number_range:{fields};'

    less_than = options.get('less_than', None)
    less_than_inclusive = options.get('less_than_inclusive', True)
    if less_than is not None:
        hash_value += '<'
        if less_than_inclusive:
            hash_value += '='
        hash_value += str(less_than)

    greater_than = options.get('greater_than', None)
    greater_than_inclusive = options.get('greater_than_inclusive', True)
    if greater_than is not None:
        hash_value += '>'
        if greater_than_inclusive:
            hash_value += '='
        hash_value += str(greater_than)

    return hash_value

create_or(group)

Creates and returns a string version of the given group as an or query.

Parameters:

Name Type Description Default
group

the group to build the or from

required

Returns:

Type Description

a string representing the group

Source code in ckanext/versioned_datastore/lib/query/schemas/v1_0_0.py
505
506
507
508
509
510
511
512
513
514
def create_or(self, group):
    """
    Creates and returns a string version of the given group as an or query.

    :param group: the group to build the or from
    :returns: a string representing the group
    """
    # sorting the members makes this stable
    members = sorted(self.create_group_or_term(member) for member in group)
    return f'or:[{"|".join(members)}]'

create_string_contains(options) staticmethod

Given the options for a string_contains term, creates and returns a string version of it.

Parameters:

Name Type Description Default
options

the options for the string_contains query

required

Returns:

Type Description

a string representing the term

Source code in ckanext/versioned_datastore/lib/query/schemas/v1_0_0.py
540
541
542
543
544
545
546
547
548
549
550
551
@staticmethod
def create_string_contains(options):
    """
    Given the options for a string_contains term, creates and returns a string
    version of it.

    :param options: the options for the string_contains query
    :returns: a string representing the term
    """
    # sorting the fields makes this stable
    fields = ','.join(sorted(options['fields']))
    return f'string_contains:{fields};{options["value"]}'

create_string_equals(options) staticmethod

Given the options for a string_equals term, creates and returns a string version of it.

Parameters:

Name Type Description Default
options

the options for the string_equals query

required

Returns:

Type Description

a string representing the term

Source code in ckanext/versioned_datastore/lib/query/schemas/v1_0_0.py
527
528
529
530
531
532
533
534
535
536
537
538
@staticmethod
def create_string_equals(options):
    """
    Given the options for a string_equals term, creates and returns a string version
    of it.

    :param options: the options for the string_equals query
    :returns: a string representing the term
    """
    # sorting the fields makes this stable
    fields = ','.join(sorted(options['fields']))
    return f'string_equals:{fields};{options["value"]}'

hash_query(query)

Stable hash function for v1.0.0 queries.

Parameters:

Name Type Description Default
query

the query dict

required

Returns:

Type Description

the hex digest

Source code in ckanext/versioned_datastore/lib/query/schemas/v1_0_0.py
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
def hash_query(self, query):
    """
    Stable hash function for v1.0.0 queries.

    :param query: the query dict
    :returns: the hex digest
    """
    query_hash = hashlib.sha1()
    if 'search' in query:
        query_hash.update(f'search:{query["search"]}'.encode('utf-8'))
    if 'filters' in query:
        data = f'filters:{self.create_group_or_term(query["filters"])}'.encode(
            'utf-8'
        )
        query_hash.update(data)
    return query_hash.hexdigest()

v1_0_0Schema

Bases: Schema

Schema class for the v1.0.0 query schema.

Source code in ckanext/versioned_datastore/lib/query/schemas/v1_0_0.py
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
class v1_0_0Schema(Schema):
    """
    Schema class for the v1.0.0 query schema.
    """

    version = 'v1.0.0'

    def __init__(self):
        self.schema, self.validator = load_core_schema(v1_0_0Schema.version)
        self.geojson = {
            'country': self.load_geojson(
                '50m-admin-0-countries-v4.1.0.geojson', ('NAME_EN', 'NAME')
            ),
            # if we use name_en we end up with one atlantic ocean whereas if we use name
            # we get 2 - the "North Atlantic Ocean" and the "South Atlantic Ocean". I
            # think this is preferable.
            'marine': self.load_geojson('50m-marine-regions-v4.1.0.geojson', ('name',)),
            'geography': self.load_geojson(
                '50m-geography-regions-v4.1.0.geojson', ('name_en', 'name')
            ),
        }
        self.hasher = v1_0_0Hasher()

    def validate(self, query: dict):
        """
        Validates the query against the v1.0.0 schema.

        :param query: the query to validate
        """
        self.validator.validate(query)

    def hash(self, query: dict) -> str:
        """
        Hashes the given query and returns the hex digest of it.

        :param query: the query dict
        :returns: the hex digest of the hash of the query
        """
        return self.hasher.hash_query(query)

    def normalise(self, query):
        """
        Corrects some (small) common query errors, e.g. removing empty groups.

        :param query: the query dict
        :returns: the corrected/normalised query dict
        """
        query = convert_small_or_groups(query)
        query = remove_empty_groups(query)
        return query

    def translate(self, query: dict) -> Query:
        """
        Translates the query into an elasticsearch-dsl search object.

        :param query: the whole query dict
        :returns: an instantiated elasticsearch-dsl object
        """
        if 'search' in query:
            search = match_query(query['search'], operator='and')
        else:
            search = None

        filters = self.get_filters(query)

        if search is None:
            if filters is None:
                return EMPTY_QUERY
            else:
                return filters
        else:
            if filters is None:
                return search
            else:
                if isinstance(filters, Bool):
                    return filters & search
                else:
                    return Bool(filter=[filters], must=[search])

    def get_filters(self, query: dict) -> Optional[Query]:
        """
        Creates a boolean query from the query into the search object and then returns
        it. If no filters are defined in the query then None is returned.

        :param query: the whole query dict
        :returns: a Bool object or None
        """
        if 'filters' in query:
            return self.create_group_or_term(query['filters'])
        return None

    def create_group_or_term(self, group_or_term):
        """
        Creates and returns the elasticsearch-dsl query object necessary for the given
        group or term dict and returns it.

        :param group_or_term: a dict defining a single group or term
        :returns: an elasticsearch-dsl Query object
        """
        # only one property is allowed so we can safely just extract the only name and
        # options
        group_or_term_type, group_or_term_options = next(iter(group_or_term.items()))
        return getattr(self, f'create_{group_or_term_type}')(group_or_term_options)

    def create_and(self, group):
        """
        Creates and returns an elasticsearch-dsl query object representing the given
        group as an and query. This will be a Bool with a must in it for groups with
        more than 1 member, or will just be the actual member if only 1 member is found
        in the group. This is strictly unnecessary as elasticsearch/lucerne itself will
        normalise the query and remove redundant nestings, but we might as well do it
        here seeing as we can, and it makes smaller elasticsearch queries.

        :param group: the group to build the and from
        :returns: the first member from the group if there's only one member in the
            group, or a Bool
        """
        members = [self.create_group_or_term(member) for member in group]
        return members[0] if len(members) == 1 else Bool(filter=members)

    def create_or(self, group):
        """
        Creates and returns an elasticsearch-dsl query object representing the given
        group as an or query. This will be a Bool with a should in it for groups with
        more than 1 member, or will just be the actual member if only 1 member is found
        in the group. This is strictly unnecessary as elasticsearch/lucerne itself will
        normalise the query and remove redundant nestings, but we might as well do it
        here seeing as we can, and it makes smaller elasticsearch queries.

        :param group: the group to build the or from
        :returns: the first member from the group if there's only one member in the
            group, or a Bool
        """
        return self.build_or([self.create_group_or_term(member) for member in group])

    def create_not(self, group):
        """
        Creates and returns an elasticsearch-dsl query object representing the given
        group as a not query. This will be a Bool with a must_not in it.

        :param group: the group to build the not from
        :returns: a Bool query
        """
        return Bool(must_not=[self.create_group_or_term(member) for member in group])

    def create_string_equals(self, options):
        """
        Given the options for a string_equals term, creates and returns an
        elasticsearch-dsl object to represent it. This term maps directly to an
        elasticsearch term query. If only one field is present in the fields property
        then the term query is returned directly, otherwise an or query is returned
        across all the fields requested.

        :param options: the options for the string_equals query
        :returns: an elasticsearch-dsl Query object or a Bool object
        """
        return self.build_or(
            [
                Q('term', **{keyword(field): options['value']})
                for field in options['fields']
            ]
        )

    def create_string_contains(self, options):
        """
        Given the options for a string_contains term, creates and returns an
        elasticsearch-dsl object to represent it. This term maps directly to an
        elasticsearch match query on the .full subfield. If only one field is present in
        the fields property then the term query is returned directly, otherwise an or
        query is returned across all the fields requested.

        :param options: the options for the string_contains query
        :returns: an elasticsearch-dsl Query object or a Bool object
        """
        fields = options['fields']
        query = {'query': options['value'], 'operator': 'and'}

        if fields:
            return self.build_or(
                [Q('match', **{text(field): query}) for field in fields]
            )
        else:
            return Q('match', **{ALL_TEXT: query})

    def create_number_equals(self, options):
        """
        Given the options for a number_equals term, creates and returns an
        elasticsearch-dsl object to represent it. This term maps directly to an
        elasticsearch term query. If only one field is present in the fields property
        then the term query is returned directly, otherwise an or query is returned
        across all the fields requested.

        :param options: the options for the number_equals query
        :returns: an elasticsearch-dsl Query object or a Bool object
        """
        return self.build_or(
            [
                Q('term', **{number(field): options['value']})
                for field in options['fields']
            ]
        )

    def create_number_range(self, options):
        """
        Given the options for a number_range term, creates and returns an elasticsearch-
        dsl object to represent it. This term maps directly to an elasticsearch range
        query. If only one field is present in the fields property then the term query
        is returned directly, otherwise an or query is returned across all the fields
        requested.

        :param options: the options for the number_range query
        :returns: an elasticsearch-dsl Query object or a Bool object
        """
        less_than = options.get('less_than', None)
        greater_than = options.get('greater_than', None)
        less_than_inclusive = options.get('less_than_inclusive', True)
        greater_than_inclusive = options.get('greater_than_inclusive', True)
        query = {}
        if less_than is not None:
            query['lt' if not less_than_inclusive else 'lte'] = less_than
        if greater_than is not None:
            query['gt' if not greater_than_inclusive else 'gte'] = greater_than

        return self.build_or(
            [Q('range', **{number(field): query}) for field in options['fields']]
        )

    def create_exists(self, options):
        """
        Given the options for an exists term, creates and returns an elasticsearch-dsl
        object to represent it. This term maps directly to an elasticsearch exists
        query. If only one field is present in the fields property then the term query
        is returned directly, otherwise an or query is returned across all the fields
        requested.

        :param options: the options for the exists query
        :returns: an elasticsearch-dsl Query object or a Bool object
        """
        # TODO: should we provide exists on subfields?
        if options.get('geo_field', False):
            return Q('exists', field=ALL_POINTS)
        else:
            return self.build_or([exists_query(field) for field in options['fields']])

    def create_geo_point(self, options):
        """
        Given the options for a geo_point term, creates and returns an elasticsearch-
        dsl object to represent it. This term maps directly to an elasticsearch
        geo_distance query. If only one field is present in the fields property then the
        term query is returned directly, otherwise an or query is returned across all
        the fields requested.

        :param options: the options for the geo_point query
        :returns: an elasticsearch-dsl Query object or a Bool object
        """
        radius = options.get('radius', 0)
        unit = options.get('radius_unit', 'm')
        return Q(
            'geo_distance',
            **{
                'distance': f'{radius}{unit}',
                ALL_POINTS: {
                    'lat': options['latitude'],
                    'lon': options['longitude'],
                },
            },
        )

    def create_geo_named_area(self, options):
        """
        Given the options for a geo_named_area term, creates and returns an
        elasticsearch-dsl object to represent it. This term maps directly to one or more
        elasticsearch geo_polygon queries, if necessary combined using ands, ors and
        nots to provide MultiPolygon hole support.

        In v1.0.0, Natural Earth Data datasets are used to provide the lists of names
        and corresponding geojson areas. The 1:50million scale is used in an attempt to
        provide a good level of detail without destroying Elasticsearch with enormous
        numbers of points. See the `theme/public/querySchemas/geojson/` directory for
        source data and readme, and also the load_geojson function in this class.

        :param options: the options for the geo_named_area query
        :returns: an elasticsearch-dsl Query object (a single geo_polygon Query or a
            Bool Query)
        """
        category, name = next(iter(options.items()))
        return self.build_multipolygon_query(self.geojson[category][name])

    def create_geo_custom_area(self, coordinates):
        """
        Given the coordinates for a geo_custom_area term, creates and returns an
        elasticsearch-dsl object to represent it. This term takes the equivalent of the
        coordinates array from a MultiPolygon type feature in GeoJSON and uses it to
        build a query which captures records that fall in the polygon (and outside any
        holes defined in the Polygon).

        :param coordinates: a MultiPolygon coordinates list
        :returns: an elasticsearch-dsl Query object (a single geo_polygon Query or a
            Bool Query)
        """
        return self.build_multipolygon_query(coordinates)

    @staticmethod
    def build_or(terms):
        """
        Utility function which when given a list of elasticsearch-dsl query objects,
        either returns the first one on it's own or creates an "or" query encapsulating
        them.

        :param terms: a list of elasticsearch-dsl terms
        :returns: either a Query object or a Bool should object
        """
        return (
            terms[0] if len(terms) == 1 else Bool(should=terms, minimum_should_match=1)
        )

    @staticmethod
    def build_geo_polygon_query(points):
        """
        Given a list of points (where each point is a list with 2 elements, the
        longitude and the latitude (note the order, it's the same as the GeoJSON spec)),
        creates a geo_polygon elasticsearch-dsl query object for the points and returns
        it.

        :param points: a list of points
        :returns: an elasticsearch-dsl query object
        """
        return Q(
            'geo_polygon',
            **{
                ALL_POINTS: {
                    'points': [{'lat': point[1], 'lon': point[0]} for point in points]
                }
            },
        )

    @staticmethod
    def build_multipolygon_query(coordinates):
        """
        Utility function for building elasticsearch-dsl queries that represent GeoJSON
        MultiPolygons. Given the coordinates this function creates a geo_polygon queries
        and Bool queries to represent the varioud enclosures and holes in those
        enclosures to find all records residing in the MultiPolygon. The coordinates
        parameter should match the format required by GeoJSON and therefore be a series
        of nested lists, see the GeoJSON docs for details.

        :param coordinates: the coordinate list, which is basically a list of Polygons.
            See the GeoJSON doc for the exact format and meaning
        :returns: an elasticsearch-dsl object representing the MultiPolygon
        """
        queries = []
        # the first list is a list of GeoJSON Polygons
        for polygon in coordinates:
            # then the Polygon is a list containing at least one element. The first
            # element is the outer boundary shape of the polygon and any other elements
            # are holes in this shape
            outer, holes = polygon[0], polygon[1:]
            outer_query = v1_0_0Schema.build_geo_polygon_query(outer)

            if holes:
                holes_queries = [
                    v1_0_0Schema.build_geo_polygon_query(hole) for hole in holes
                ]
                # create a query which filters the outer query but filters out the holes
                queries.append(Bool(filter=[outer_query], must_not=holes_queries))
            else:
                queries.append(outer_query)

        return v1_0_0Schema.build_or(queries)

    @staticmethod
    def load_geojson(filename, name_keys):
        """
        Load the given geojson file, build a lookup using the data and the name_keys
        parameter and return it.

        The geojson file is assumed to be a list of features containing only Polygon or
        MultiPolygon types.

        The name_keys parameter should be a sequence of keys to use to retrieve a name
        for the feature from the properties dict. The first key found in the properties
        dict with a value is used and therefore the keys listed should be in priority
        order. The extracted name is passed to string.capwords to produce a sensible and
        consistent set of names.

        :param filename: the name geojson file to load from the given path
        :param name_keys: a priority ordered sequence of keys to use for feature name
            retrieval
        :returns: a dict of names -> MultiPolygons
        """
        path = schema_base_path.joinpath(v1_0_0Schema.version).joinpath('geojson')

        # make sure we read the file using utf-8
        with io.open(path.joinpath(filename), 'r', encoding='utf-8') as f:
            lookup = defaultdict(list)
            for feature in json.load(f)['features']:
                # find the first name key with a value and pass it to string.capwords
                name = string.capwords(
                    next(
                        iter(
                            filter(
                                None,
                                (
                                    feature['properties'].get(key, None)
                                    for key in name_keys
                                ),
                            )
                        )
                    )
                )

                coordinates = feature['geometry']['coordinates']
                # if the feature is a Polygon, wrap it in a list to make it a
                # MultiPolygon
                if feature['geometry']['type'] == 'Polygon':
                    coordinates = [coordinates]

                # add the polygons found to the existing MultiPolygon (some names are
                # listed multiple times in the source geojson files and require
                # stitching together to make a single name -> MultiPolygon mapping
                for polygon in coordinates:
                    # if a polygon is already represented in the MultiPolygon, ignore
                    # the dupe
                    if polygon not in lookup[name]:
                        lookup[name].append(polygon)

            return lookup

build_geo_polygon_query(points) staticmethod

Given a list of points (where each point is a list with 2 elements, the longitude and the latitude (note the order, it's the same as the GeoJSON spec)), creates a geo_polygon elasticsearch-dsl query object for the points and returns it.

Parameters:

Name Type Description Default
points

a list of points

required

Returns:

Type Description

an elasticsearch-dsl query object

Source code in ckanext/versioned_datastore/lib/query/schemas/v1_0_0.py
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
@staticmethod
def build_geo_polygon_query(points):
    """
    Given a list of points (where each point is a list with 2 elements, the
    longitude and the latitude (note the order, it's the same as the GeoJSON spec)),
    creates a geo_polygon elasticsearch-dsl query object for the points and returns
    it.

    :param points: a list of points
    :returns: an elasticsearch-dsl query object
    """
    return Q(
        'geo_polygon',
        **{
            ALL_POINTS: {
                'points': [{'lat': point[1], 'lon': point[0]} for point in points]
            }
        },
    )

build_multipolygon_query(coordinates) staticmethod

Utility function for building elasticsearch-dsl queries that represent GeoJSON MultiPolygons. Given the coordinates this function creates a geo_polygon queries and Bool queries to represent the varioud enclosures and holes in those enclosures to find all records residing in the MultiPolygon. The coordinates parameter should match the format required by GeoJSON and therefore be a series of nested lists, see the GeoJSON docs for details.

Parameters:

Name Type Description Default
coordinates

the coordinate list, which is basically a list of Polygons. See the GeoJSON doc for the exact format and meaning

required

Returns:

Type Description

an elasticsearch-dsl object representing the MultiPolygon

Source code in ckanext/versioned_datastore/lib/query/schemas/v1_0_0.py
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
@staticmethod
def build_multipolygon_query(coordinates):
    """
    Utility function for building elasticsearch-dsl queries that represent GeoJSON
    MultiPolygons. Given the coordinates this function creates a geo_polygon queries
    and Bool queries to represent the varioud enclosures and holes in those
    enclosures to find all records residing in the MultiPolygon. The coordinates
    parameter should match the format required by GeoJSON and therefore be a series
    of nested lists, see the GeoJSON docs for details.

    :param coordinates: the coordinate list, which is basically a list of Polygons.
        See the GeoJSON doc for the exact format and meaning
    :returns: an elasticsearch-dsl object representing the MultiPolygon
    """
    queries = []
    # the first list is a list of GeoJSON Polygons
    for polygon in coordinates:
        # then the Polygon is a list containing at least one element. The first
        # element is the outer boundary shape of the polygon and any other elements
        # are holes in this shape
        outer, holes = polygon[0], polygon[1:]
        outer_query = v1_0_0Schema.build_geo_polygon_query(outer)

        if holes:
            holes_queries = [
                v1_0_0Schema.build_geo_polygon_query(hole) for hole in holes
            ]
            # create a query which filters the outer query but filters out the holes
            queries.append(Bool(filter=[outer_query], must_not=holes_queries))
        else:
            queries.append(outer_query)

    return v1_0_0Schema.build_or(queries)

build_or(terms) staticmethod

Utility function which when given a list of elasticsearch-dsl query objects, either returns the first one on it's own or creates an "or" query encapsulating them.

Parameters:

Name Type Description Default
terms

a list of elasticsearch-dsl terms

required

Returns:

Type Description

either a Query object or a Bool should object

Source code in ckanext/versioned_datastore/lib/query/schemas/v1_0_0.py
332
333
334
335
336
337
338
339
340
341
342
343
344
@staticmethod
def build_or(terms):
    """
    Utility function which when given a list of elasticsearch-dsl query objects,
    either returns the first one on it's own or creates an "or" query encapsulating
    them.

    :param terms: a list of elasticsearch-dsl terms
    :returns: either a Query object or a Bool should object
    """
    return (
        terms[0] if len(terms) == 1 else Bool(should=terms, minimum_should_match=1)
    )

create_and(group)

Creates and returns an elasticsearch-dsl query object representing the given group as an and query. This will be a Bool with a must in it for groups with more than 1 member, or will just be the actual member if only 1 member is found in the group. This is strictly unnecessary as elasticsearch/lucerne itself will normalise the query and remove redundant nestings, but we might as well do it here seeing as we can, and it makes smaller elasticsearch queries.

Parameters:

Name Type Description Default
group

the group to build the and from

required

Returns:

Type Description

the first member from the group if there's only one member in the group, or a Bool

Source code in ckanext/versioned_datastore/lib/query/schemas/v1_0_0.py
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
def create_and(self, group):
    """
    Creates and returns an elasticsearch-dsl query object representing the given
    group as an and query. This will be a Bool with a must in it for groups with
    more than 1 member, or will just be the actual member if only 1 member is found
    in the group. This is strictly unnecessary as elasticsearch/lucerne itself will
    normalise the query and remove redundant nestings, but we might as well do it
    here seeing as we can, and it makes smaller elasticsearch queries.

    :param group: the group to build the and from
    :returns: the first member from the group if there's only one member in the
        group, or a Bool
    """
    members = [self.create_group_or_term(member) for member in group]
    return members[0] if len(members) == 1 else Bool(filter=members)

create_exists(options)

Given the options for an exists term, creates and returns an elasticsearch-dsl object to represent it. This term maps directly to an elasticsearch exists query. If only one field is present in the fields property then the term query is returned directly, otherwise an or query is returned across all the fields requested.

Parameters:

Name Type Description Default
options

the options for the exists query

required

Returns:

Type Description

an elasticsearch-dsl Query object or a Bool object

Source code in ckanext/versioned_datastore/lib/query/schemas/v1_0_0.py
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
def create_exists(self, options):
    """
    Given the options for an exists term, creates and returns an elasticsearch-dsl
    object to represent it. This term maps directly to an elasticsearch exists
    query. If only one field is present in the fields property then the term query
    is returned directly, otherwise an or query is returned across all the fields
    requested.

    :param options: the options for the exists query
    :returns: an elasticsearch-dsl Query object or a Bool object
    """
    # TODO: should we provide exists on subfields?
    if options.get('geo_field', False):
        return Q('exists', field=ALL_POINTS)
    else:
        return self.build_or([exists_query(field) for field in options['fields']])

create_geo_custom_area(coordinates)

Given the coordinates for a geo_custom_area term, creates and returns an elasticsearch-dsl object to represent it. This term takes the equivalent of the coordinates array from a MultiPolygon type feature in GeoJSON and uses it to build a query which captures records that fall in the polygon (and outside any holes defined in the Polygon).

Parameters:

Name Type Description Default
coordinates

a MultiPolygon coordinates list

required

Returns:

Type Description

an elasticsearch-dsl Query object (a single geo_polygon Query or a Bool Query)

Source code in ckanext/versioned_datastore/lib/query/schemas/v1_0_0.py
318
319
320
321
322
323
324
325
326
327
328
329
330
def create_geo_custom_area(self, coordinates):
    """
    Given the coordinates for a geo_custom_area term, creates and returns an
    elasticsearch-dsl object to represent it. This term takes the equivalent of the
    coordinates array from a MultiPolygon type feature in GeoJSON and uses it to
    build a query which captures records that fall in the polygon (and outside any
    holes defined in the Polygon).

    :param coordinates: a MultiPolygon coordinates list
    :returns: an elasticsearch-dsl Query object (a single geo_polygon Query or a
        Bool Query)
    """
    return self.build_multipolygon_query(coordinates)

create_geo_named_area(options)

Given the options for a geo_named_area term, creates and returns an elasticsearch-dsl object to represent it. This term maps directly to one or more elasticsearch geo_polygon queries, if necessary combined using ands, ors and nots to provide MultiPolygon hole support.

In v1.0.0, Natural Earth Data datasets are used to provide the lists of names and corresponding geojson areas. The 1:50million scale is used in an attempt to provide a good level of detail without destroying Elasticsearch with enormous numbers of points. See the theme/public/querySchemas/geojson/ directory for source data and readme, and also the load_geojson function in this class.

Parameters:

Name Type Description Default
options

the options for the geo_named_area query

required

Returns:

Type Description

an elasticsearch-dsl Query object (a single geo_polygon Query or a Bool Query)

Source code in ckanext/versioned_datastore/lib/query/schemas/v1_0_0.py
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
def create_geo_named_area(self, options):
    """
    Given the options for a geo_named_area term, creates and returns an
    elasticsearch-dsl object to represent it. This term maps directly to one or more
    elasticsearch geo_polygon queries, if necessary combined using ands, ors and
    nots to provide MultiPolygon hole support.

    In v1.0.0, Natural Earth Data datasets are used to provide the lists of names
    and corresponding geojson areas. The 1:50million scale is used in an attempt to
    provide a good level of detail without destroying Elasticsearch with enormous
    numbers of points. See the `theme/public/querySchemas/geojson/` directory for
    source data and readme, and also the load_geojson function in this class.

    :param options: the options for the geo_named_area query
    :returns: an elasticsearch-dsl Query object (a single geo_polygon Query or a
        Bool Query)
    """
    category, name = next(iter(options.items()))
    return self.build_multipolygon_query(self.geojson[category][name])

create_geo_point(options)

Given the options for a geo_point term, creates and returns an elasticsearch- dsl object to represent it. This term maps directly to an elasticsearch geo_distance query. If only one field is present in the fields property then the term query is returned directly, otherwise an or query is returned across all the fields requested.

Parameters:

Name Type Description Default
options

the options for the geo_point query

required

Returns:

Type Description

an elasticsearch-dsl Query object or a Bool object

Source code in ckanext/versioned_datastore/lib/query/schemas/v1_0_0.py
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
def create_geo_point(self, options):
    """
    Given the options for a geo_point term, creates and returns an elasticsearch-
    dsl object to represent it. This term maps directly to an elasticsearch
    geo_distance query. If only one field is present in the fields property then the
    term query is returned directly, otherwise an or query is returned across all
    the fields requested.

    :param options: the options for the geo_point query
    :returns: an elasticsearch-dsl Query object or a Bool object
    """
    radius = options.get('radius', 0)
    unit = options.get('radius_unit', 'm')
    return Q(
        'geo_distance',
        **{
            'distance': f'{radius}{unit}',
            ALL_POINTS: {
                'lat': options['latitude'],
                'lon': options['longitude'],
            },
        },
    )

create_group_or_term(group_or_term)

Creates and returns the elasticsearch-dsl query object necessary for the given group or term dict and returns it.

Parameters:

Name Type Description Default
group_or_term

a dict defining a single group or term

required

Returns:

Type Description

an elasticsearch-dsl Query object

Source code in ckanext/versioned_datastore/lib/query/schemas/v1_0_0.py
121
122
123
124
125
126
127
128
129
130
131
132
def create_group_or_term(self, group_or_term):
    """
    Creates and returns the elasticsearch-dsl query object necessary for the given
    group or term dict and returns it.

    :param group_or_term: a dict defining a single group or term
    :returns: an elasticsearch-dsl Query object
    """
    # only one property is allowed so we can safely just extract the only name and
    # options
    group_or_term_type, group_or_term_options = next(iter(group_or_term.items()))
    return getattr(self, f'create_{group_or_term_type}')(group_or_term_options)

create_not(group)

Creates and returns an elasticsearch-dsl query object representing the given group as a not query. This will be a Bool with a must_not in it.

Parameters:

Name Type Description Default
group

the group to build the not from

required

Returns:

Type Description

a Bool query

Source code in ckanext/versioned_datastore/lib/query/schemas/v1_0_0.py
165
166
167
168
169
170
171
172
173
def create_not(self, group):
    """
    Creates and returns an elasticsearch-dsl query object representing the given
    group as a not query. This will be a Bool with a must_not in it.

    :param group: the group to build the not from
    :returns: a Bool query
    """
    return Bool(must_not=[self.create_group_or_term(member) for member in group])

create_number_equals(options)

Given the options for a number_equals term, creates and returns an elasticsearch-dsl object to represent it. This term maps directly to an elasticsearch term query. If only one field is present in the fields property then the term query is returned directly, otherwise an or query is returned across all the fields requested.

Parameters:

Name Type Description Default
options

the options for the number_equals query

required

Returns:

Type Description

an elasticsearch-dsl Query object or a Bool object

Source code in ckanext/versioned_datastore/lib/query/schemas/v1_0_0.py
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
def create_number_equals(self, options):
    """
    Given the options for a number_equals term, creates and returns an
    elasticsearch-dsl object to represent it. This term maps directly to an
    elasticsearch term query. If only one field is present in the fields property
    then the term query is returned directly, otherwise an or query is returned
    across all the fields requested.

    :param options: the options for the number_equals query
    :returns: an elasticsearch-dsl Query object or a Bool object
    """
    return self.build_or(
        [
            Q('term', **{number(field): options['value']})
            for field in options['fields']
        ]
    )

create_number_range(options)

Given the options for a number_range term, creates and returns an elasticsearch- dsl object to represent it. This term maps directly to an elasticsearch range query. If only one field is present in the fields property then the term query is returned directly, otherwise an or query is returned across all the fields requested.

Parameters:

Name Type Description Default
options

the options for the number_range query

required

Returns:

Type Description

an elasticsearch-dsl Query object or a Bool object

Source code in ckanext/versioned_datastore/lib/query/schemas/v1_0_0.py
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
def create_number_range(self, options):
    """
    Given the options for a number_range term, creates and returns an elasticsearch-
    dsl object to represent it. This term maps directly to an elasticsearch range
    query. If only one field is present in the fields property then the term query
    is returned directly, otherwise an or query is returned across all the fields
    requested.

    :param options: the options for the number_range query
    :returns: an elasticsearch-dsl Query object or a Bool object
    """
    less_than = options.get('less_than', None)
    greater_than = options.get('greater_than', None)
    less_than_inclusive = options.get('less_than_inclusive', True)
    greater_than_inclusive = options.get('greater_than_inclusive', True)
    query = {}
    if less_than is not None:
        query['lt' if not less_than_inclusive else 'lte'] = less_than
    if greater_than is not None:
        query['gt' if not greater_than_inclusive else 'gte'] = greater_than

    return self.build_or(
        [Q('range', **{number(field): query}) for field in options['fields']]
    )

create_or(group)

Creates and returns an elasticsearch-dsl query object representing the given group as an or query. This will be a Bool with a should in it for groups with more than 1 member, or will just be the actual member if only 1 member is found in the group. This is strictly unnecessary as elasticsearch/lucerne itself will normalise the query and remove redundant nestings, but we might as well do it here seeing as we can, and it makes smaller elasticsearch queries.

Parameters:

Name Type Description Default
group

the group to build the or from

required

Returns:

Type Description

the first member from the group if there's only one member in the group, or a Bool

Source code in ckanext/versioned_datastore/lib/query/schemas/v1_0_0.py
150
151
152
153
154
155
156
157
158
159
160
161
162
163
def create_or(self, group):
    """
    Creates and returns an elasticsearch-dsl query object representing the given
    group as an or query. This will be a Bool with a should in it for groups with
    more than 1 member, or will just be the actual member if only 1 member is found
    in the group. This is strictly unnecessary as elasticsearch/lucerne itself will
    normalise the query and remove redundant nestings, but we might as well do it
    here seeing as we can, and it makes smaller elasticsearch queries.

    :param group: the group to build the or from
    :returns: the first member from the group if there's only one member in the
        group, or a Bool
    """
    return self.build_or([self.create_group_or_term(member) for member in group])

create_string_contains(options)

Given the options for a string_contains term, creates and returns an elasticsearch-dsl object to represent it. This term maps directly to an elasticsearch match query on the .full subfield. If only one field is present in the fields property then the term query is returned directly, otherwise an or query is returned across all the fields requested.

Parameters:

Name Type Description Default
options

the options for the string_contains query

required

Returns:

Type Description

an elasticsearch-dsl Query object or a Bool object

Source code in ckanext/versioned_datastore/lib/query/schemas/v1_0_0.py
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
def create_string_contains(self, options):
    """
    Given the options for a string_contains term, creates and returns an
    elasticsearch-dsl object to represent it. This term maps directly to an
    elasticsearch match query on the .full subfield. If only one field is present in
    the fields property then the term query is returned directly, otherwise an or
    query is returned across all the fields requested.

    :param options: the options for the string_contains query
    :returns: an elasticsearch-dsl Query object or a Bool object
    """
    fields = options['fields']
    query = {'query': options['value'], 'operator': 'and'}

    if fields:
        return self.build_or(
            [Q('match', **{text(field): query}) for field in fields]
        )
    else:
        return Q('match', **{ALL_TEXT: query})

create_string_equals(options)

Given the options for a string_equals term, creates and returns an elasticsearch-dsl object to represent it. This term maps directly to an elasticsearch term query. If only one field is present in the fields property then the term query is returned directly, otherwise an or query is returned across all the fields requested.

Parameters:

Name Type Description Default
options

the options for the string_equals query

required

Returns:

Type Description

an elasticsearch-dsl Query object or a Bool object

Source code in ckanext/versioned_datastore/lib/query/schemas/v1_0_0.py
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
def create_string_equals(self, options):
    """
    Given the options for a string_equals term, creates and returns an
    elasticsearch-dsl object to represent it. This term maps directly to an
    elasticsearch term query. If only one field is present in the fields property
    then the term query is returned directly, otherwise an or query is returned
    across all the fields requested.

    :param options: the options for the string_equals query
    :returns: an elasticsearch-dsl Query object or a Bool object
    """
    return self.build_or(
        [
            Q('term', **{keyword(field): options['value']})
            for field in options['fields']
        ]
    )

get_filters(query)

Creates a boolean query from the query into the search object and then returns it. If no filters are defined in the query then None is returned.

Parameters:

Name Type Description Default
query dict

the whole query dict

required

Returns:

Type Description
Optional[Query]

a Bool object or None

Source code in ckanext/versioned_datastore/lib/query/schemas/v1_0_0.py
109
110
111
112
113
114
115
116
117
118
119
def get_filters(self, query: dict) -> Optional[Query]:
    """
    Creates a boolean query from the query into the search object and then returns
    it. If no filters are defined in the query then None is returned.

    :param query: the whole query dict
    :returns: a Bool object or None
    """
    if 'filters' in query:
        return self.create_group_or_term(query['filters'])
    return None

hash(query)

Hashes the given query and returns the hex digest of it.

Parameters:

Name Type Description Default
query dict

the query dict

required

Returns:

Type Description
str

the hex digest of the hash of the query

Source code in ckanext/versioned_datastore/lib/query/schemas/v1_0_0.py
61
62
63
64
65
66
67
68
def hash(self, query: dict) -> str:
    """
    Hashes the given query and returns the hex digest of it.

    :param query: the query dict
    :returns: the hex digest of the hash of the query
    """
    return self.hasher.hash_query(query)

load_geojson(filename, name_keys) staticmethod

Load the given geojson file, build a lookup using the data and the name_keys parameter and return it.

The geojson file is assumed to be a list of features containing only Polygon or MultiPolygon types.

The name_keys parameter should be a sequence of keys to use to retrieve a name for the feature from the properties dict. The first key found in the properties dict with a value is used and therefore the keys listed should be in priority order. The extracted name is passed to string.capwords to produce a sensible and consistent set of names.

Parameters:

Name Type Description Default
filename

the name geojson file to load from the given path

required
name_keys

a priority ordered sequence of keys to use for feature name retrieval

required

Returns:

Type Description

a dict of names -> MultiPolygons

Source code in ckanext/versioned_datastore/lib/query/schemas/v1_0_0.py
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
@staticmethod
def load_geojson(filename, name_keys):
    """
    Load the given geojson file, build a lookup using the data and the name_keys
    parameter and return it.

    The geojson file is assumed to be a list of features containing only Polygon or
    MultiPolygon types.

    The name_keys parameter should be a sequence of keys to use to retrieve a name
    for the feature from the properties dict. The first key found in the properties
    dict with a value is used and therefore the keys listed should be in priority
    order. The extracted name is passed to string.capwords to produce a sensible and
    consistent set of names.

    :param filename: the name geojson file to load from the given path
    :param name_keys: a priority ordered sequence of keys to use for feature name
        retrieval
    :returns: a dict of names -> MultiPolygons
    """
    path = schema_base_path.joinpath(v1_0_0Schema.version).joinpath('geojson')

    # make sure we read the file using utf-8
    with io.open(path.joinpath(filename), 'r', encoding='utf-8') as f:
        lookup = defaultdict(list)
        for feature in json.load(f)['features']:
            # find the first name key with a value and pass it to string.capwords
            name = string.capwords(
                next(
                    iter(
                        filter(
                            None,
                            (
                                feature['properties'].get(key, None)
                                for key in name_keys
                            ),
                        )
                    )
                )
            )

            coordinates = feature['geometry']['coordinates']
            # if the feature is a Polygon, wrap it in a list to make it a
            # MultiPolygon
            if feature['geometry']['type'] == 'Polygon':
                coordinates = [coordinates]

            # add the polygons found to the existing MultiPolygon (some names are
            # listed multiple times in the source geojson files and require
            # stitching together to make a single name -> MultiPolygon mapping
            for polygon in coordinates:
                # if a polygon is already represented in the MultiPolygon, ignore
                # the dupe
                if polygon not in lookup[name]:
                    lookup[name].append(polygon)

        return lookup

normalise(query)

Corrects some (small) common query errors, e.g. removing empty groups.

Parameters:

Name Type Description Default
query

the query dict

required

Returns:

Type Description

the corrected/normalised query dict

Source code in ckanext/versioned_datastore/lib/query/schemas/v1_0_0.py
70
71
72
73
74
75
76
77
78
79
def normalise(self, query):
    """
    Corrects some (small) common query errors, e.g. removing empty groups.

    :param query: the query dict
    :returns: the corrected/normalised query dict
    """
    query = convert_small_or_groups(query)
    query = remove_empty_groups(query)
    return query

translate(query)

Translates the query into an elasticsearch-dsl search object.

Parameters:

Name Type Description Default
query dict

the whole query dict

required

Returns:

Type Description
Query

an instantiated elasticsearch-dsl object

Source code in ckanext/versioned_datastore/lib/query/schemas/v1_0_0.py
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
def translate(self, query: dict) -> Query:
    """
    Translates the query into an elasticsearch-dsl search object.

    :param query: the whole query dict
    :returns: an instantiated elasticsearch-dsl object
    """
    if 'search' in query:
        search = match_query(query['search'], operator='and')
    else:
        search = None

    filters = self.get_filters(query)

    if search is None:
        if filters is None:
            return EMPTY_QUERY
        else:
            return filters
    else:
        if filters is None:
            return search
        else:
            if isinstance(filters, Bool):
                return filters & search
            else:
                return Bool(filter=[filters], must=[search])

validate(query)

Validates the query against the v1.0.0 schema.

Parameters:

Name Type Description Default
query dict

the query to validate

required
Source code in ckanext/versioned_datastore/lib/query/schemas/v1_0_0.py
53
54
55
56
57
58
59
def validate(self, query: dict):
    """
    Validates the query against the v1.0.0 schema.

    :param query: the query to validate
    """
    self.validator.validate(query)