Commit 40049809f48ce41c2a2aa69528fc315426da2429
1 parent
326e97bb
Exists in
master
and in
21 other branches
geo_shape support in index and query, without much sugar
Showing
5 changed files
with
319 additions
and
2 deletions
Show diff stats
README.md
... | ... | @@ -834,7 +834,9 @@ product = Product.first |
834 | 834 | product.similar(fields: ["name"], where: {size: "12 oz"}) |
835 | 835 | ``` |
836 | 836 | |
837 | -### Geospatial Searches | |
837 | +### Simple Geospatial Indexing: geo_points | |
838 | + | |
839 | +If your data consists of point values, searchkick offers a useful shorthand: | |
838 | 840 | |
839 | 841 | ```ruby |
840 | 842 | class City < ActiveRecord::Base |
... | ... | @@ -846,7 +848,9 @@ class City < ActiveRecord::Base |
846 | 848 | end |
847 | 849 | ``` |
848 | 850 | |
849 | -Reindex and search with: | |
851 | +Elasticsearch supports a range of useful search types for geo_point data: | |
852 | + | |
853 | +Within a radius | |
850 | 854 | |
851 | 855 | ```ruby |
852 | 856 | City.search "san", where: {location: {near: {lat: 37, lon: -114}, within: "100mi"}} # or 160km |
... | ... | @@ -878,6 +882,81 @@ Also supports [additional options](https://www.elastic.co/guide/en/elasticsearch |
878 | 882 | City.search "san", boost_by_distance: {field: :location, origin: {lat: 37, lon: -122}, function: :linear, scale: "30mi", decay: 0.5} |
879 | 883 | ``` |
880 | 884 | |
885 | + | |
886 | +### Complex Geospatial Indexing: geo_shapes | |
887 | + | |
888 | +You can also pass through complex or varied shapes as GeoJSON objects. | |
889 | + | |
890 | +```ruby | |
891 | +class City < ActiveRecord::Base | |
892 | + searchkick geo_shapes: { | |
893 | + bounds: {tree: "geohash", precision: "1km"} | |
894 | + perimeter: {tree: "quadtree", precision: "10m"} | |
895 | + } | |
896 | + | |
897 | + def search_data | |
898 | + attributes.merge { | |
899 | + bounds: { | |
900 | + type: "envelope", | |
901 | + coordinates: [{lat: 4, lon: 1}, {lat: 2, lon: 3}] | |
902 | + }, | |
903 | + perimeter: { | |
904 | + type: "polygon", | |
905 | + coordinates: [[{lat: 1, lon: 2}, {lat: 3, lon: 4}, {lat: 5, lon: 6}, ...]] | |
906 | + } | |
907 | + } | |
908 | + end | |
909 | +end | |
910 | +``` | |
911 | + | |
912 | +The `geo_shapes` hash is passed through to elasticsearch without modification. Please see the [geo_shape data type documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/geo-shape.html) for options. | |
913 | + | |
914 | +Any geospatial data type can be used in the index or in the search. It is up to you to ensure that it is a valid geoJSON representation. The possible shapes are: | |
915 | + | |
916 | +* **point**: single lat/lon pair | |
917 | +* **multipoint**: array of points | |
918 | +* **linestring**: array of at least two lat/lon pairs | |
919 | +* **multilinestring**: array of lines | |
920 | +* **polygon**: an array of paths, each being an array of at least four lat/lon pairs whose first and last points are the same. Paths after the first represent exclusions. | |
921 | +* **multipolygon**: array of polygons | |
922 | +* **envelope**: a bounding box defined by top left and bottom right points | |
923 | +* **circle**: a bounding circle defined by center point and radius | |
924 | +* **geometrycollection**: an array of separate geoJSON objects possibly of various types | |
925 | + | |
926 | +See the [Elasticsearch documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/geo-shape.html) for details. GeoJSON coordinates are usually given as an array of `[lon, lat]` points but this often causes swapping errors so searchkick can also take objects with `lon` and `lat` keys. | |
927 | + | |
928 | +Elasticsearch is sensitive about geo_shape validity. For example it will throw an exception if a polygon contains two consecutive identical points, or is not properly closed. You probably want to validate the data during indexing. | |
929 | + | |
930 | + | |
931 | +### Geospatial searching | |
932 | + | |
933 | +Once a geo_shape index is established, you can apply a geo_shape filter to any search. This also takes a geoJSON shape, and will return a list of items based on their overlap with that shape. | |
934 | + | |
935 | +Intersecting with the query shape: | |
936 | + | |
937 | +```ruby | |
938 | +City.search "san", where: {bounds: {geo_shape: {type: "polygon", coordinates: [[{lat: 38, lon: -123}, ...]]}}} | |
939 | +``` | |
940 | + | |
941 | +Falling entirely within the query shape: | |
942 | + | |
943 | +```ruby | |
944 | +City.search "san", where: {relation: "within", bounds: {geo_shape: {type: "circle", coordinates: [{lat: 38, lon: -123}], radius: "1km"}}} | |
945 | +``` | |
946 | + | |
947 | +Not touching the query shape: | |
948 | + | |
949 | +```ruby | |
950 | +City.search "san", where: {relation: "disjoint", bounds: {geo_shape: {type: "envelope", coordinates: [{lat: 38, lon: -123}, {lat: 37, lon: -122}]}}} | |
951 | +``` | |
952 | + | |
953 | +Envelope is a special case. For consistency, searchkick also understands coordinates given as top_left and bottom_right: | |
954 | + | |
955 | +```ruby | |
956 | +City.search "san", where: {relation: "within", bounds: {geo_shape: {type: "envelope", top_left: {lat: 38, lon: -123}, bottom_right: {lat: 37, lon: -122}}}} | |
957 | +``` | |
958 | + | |
959 | + | |
881 | 960 | ### Routing |
882 | 961 | |
883 | 962 | Searchkick supports [Elasticsearchโs routing feature](https://www.elastic.co/blog/customizing-your-document-routing). | ... | ... |
lib/searchkick/index_options.rb
... | ... | @@ -281,6 +281,11 @@ module Searchkick |
281 | 281 | } |
282 | 282 | end |
283 | 283 | |
284 | + options[:geo_shapes] = options[:geo_shapes].product([{}]).to_h if options[:geo_shapes].is_a? Array | |
285 | + (options[:geo_shapes] || {}).each do |field, shape_options| | |
286 | + mapping[field] = shape_options.merge(type: "geo_shape") | |
287 | + end | |
288 | + | |
284 | 289 | (options[:unsearchable] || []).map(&:to_s).each do |field| |
285 | 290 | mapping[field] = { |
286 | 291 | type: default_type, | ... | ... |
lib/searchkick/query.rb
... | ... | @@ -808,6 +808,25 @@ module Searchkick |
808 | 808 | field => op_value |
809 | 809 | } |
810 | 810 | } |
811 | + when :geo_shape | |
812 | + if op_value[:type] == "envelope" && op_value[:top_left].present? && op_value[:bottom_right].present? | |
813 | + op_value[:coordinates] = [coordinate_array(op_value[:top_left]), coordinate_array(op_value[:bottom_right])] | |
814 | + op_value.delete(:top_left) | |
815 | + op_value.delete(:bottom_right) | |
816 | + elsif op_value[:type] == "circle" | |
817 | + op_value[:coordinates] = coordinate_array(op_value[:coordinates] || []) | |
818 | + else | |
819 | + op_value[:coordinates] = (op_value[:coordinates] || []).map { |loc| coordinate_array(loc) } | |
820 | + end | |
821 | + relation = op_value.delete(:relation) || 'intersects' | |
822 | + filters << { | |
823 | + geo_shape: { | |
824 | + field => { | |
825 | + relation: relation, | |
826 | + shape: op_value | |
827 | + } | |
828 | + } | |
829 | + } | |
811 | 830 | when :top_left |
812 | 831 | filters << { |
813 | 832 | geo_bounding_box: { |
... | ... | @@ -923,6 +942,21 @@ module Searchkick |
923 | 942 | end |
924 | 943 | end |
925 | 944 | |
945 | + # Recursively descend through nesting of arrays until we reach either a lat/lon object or an array of numbers, | |
946 | + # eventually returning the same structure with all values transformed to [lon, lat]. Question: should we reverse | |
947 | + # the array order so that arguments can be given as [lat, lon], as happens elsewhere in searchkick? We are moving | |
948 | + # GeoJSON around so it seems better to stick to that specification, though the lat/lon objects are already a deviation. | |
949 | + # | |
950 | + def coordinate_array(value) | |
951 | + if value.is_a?(Hash) | |
952 | + [value[:lon], value[:lat]] | |
953 | + elsif value.is_a?(Array) and !value[0].is_a?(Numeric) | |
954 | + value.map {|a| coordinate_array(a) } | |
955 | + else | |
956 | + value | |
957 | + end | |
958 | + end | |
959 | + | |
926 | 960 | def location_value(value) |
927 | 961 | if value.is_a?(Array) |
928 | 962 | value.map(&:to_f).reverse | ... | ... |
... | ... | @@ -0,0 +1,143 @@ |
1 | +require "pp" | |
2 | +require_relative "test_helper" | |
3 | + | |
4 | +class GeoShapeTest < Minitest::Test | |
5 | + | |
6 | + def test_geo_shape | |
7 | + regions = [ | |
8 | + {name: "Region A", text: "The witch had a cat", territory: "30,40,35,45,40,40,40,30,30,30,30,40"}, | |
9 | + {name: "Region B", text: "and a very tall hat", territory: "50,60,55,65,60,60,60,50,50,50,50,60"}, | |
10 | + {name: "Region C", text: "and long ginger hair which she wore in a plait.", territory: "10,20,15,25,20,20,20,10,10,10,10,20"}, | |
11 | + ] | |
12 | + store regions, Region | |
13 | + | |
14 | + # circle | |
15 | + assert_search "*", ["Region A"], { | |
16 | + where: { | |
17 | + territory: { | |
18 | + geo_shape: { | |
19 | + type: "circle", | |
20 | + coordinates: {lat: 28.0, lon: 38.0}, | |
21 | + radius: "444000m" | |
22 | + } | |
23 | + } | |
24 | + } | |
25 | + }, Region | |
26 | + | |
27 | + # envelope | |
28 | + assert_search "*", ["Region A"], { | |
29 | + where: { | |
30 | + territory: { | |
31 | + geo_shape: { | |
32 | + type: "envelope", | |
33 | + coordinates: [[28, 42], [32, 38]] | |
34 | + } | |
35 | + } | |
36 | + } | |
37 | + }, Region | |
38 | + | |
39 | + # envelope as corners | |
40 | + assert_search "*", ["Region A"], { | |
41 | + where: { | |
42 | + territory: { | |
43 | + geo_shape: { | |
44 | + type: "envelope", | |
45 | + top_left: {lat: 42.0, lon: 28.0}, | |
46 | + bottom_right: {lat: 38.0, lon: 32.0} | |
47 | + } | |
48 | + } | |
49 | + } | |
50 | + }, Region | |
51 | + | |
52 | + # polygon | |
53 | + assert_search "*", ["Region A"], { | |
54 | + where: { | |
55 | + territory: { | |
56 | + geo_shape: { | |
57 | + type: "polygon", | |
58 | + coordinates: [[[38, 42], [42, 42], [42, 38], [38, 38], [38, 42]]] | |
59 | + } | |
60 | + } | |
61 | + } | |
62 | + }, Region | |
63 | + | |
64 | + # multipolygon | |
65 | + assert_search "*", ["Region A", "Region B"], { | |
66 | + where: { | |
67 | + territory: { | |
68 | + geo_shape: { | |
69 | + type: "multipolygon", | |
70 | + coordinates: [ | |
71 | + [[[38, 42], [42, 42], [42, 38], [38, 38], [38, 42]]], | |
72 | + [[[58, 62], [62, 62], [62, 58], [58, 58], [58, 62]]] | |
73 | + ] | |
74 | + } | |
75 | + } | |
76 | + } | |
77 | + }, Region | |
78 | + | |
79 | + # disjoint | |
80 | + assert_search "*", ["Region B", "Region C"], { | |
81 | + where: { | |
82 | + territory: { | |
83 | + geo_shape: { | |
84 | + type: "envelope", | |
85 | + relation: "disjoint", | |
86 | + coordinates: [[28, 42], [32, 38]] | |
87 | + } | |
88 | + } | |
89 | + } | |
90 | + }, Region | |
91 | + | |
92 | + # within | |
93 | + assert_search "*", ["Region A"], { | |
94 | + where: { | |
95 | + territory: { | |
96 | + geo_shape: { | |
97 | + type: "envelope", | |
98 | + relation: "within", | |
99 | + coordinates: [[20, 50], [50, 20]] | |
100 | + } | |
101 | + } | |
102 | + } | |
103 | + }, Region | |
104 | + | |
105 | + # contains | |
106 | + assert_search "*", ["Region A"], { | |
107 | + where: { | |
108 | + territory: { | |
109 | + geo_shape: { | |
110 | + type: "envelope", | |
111 | + relation: "contains", | |
112 | + coordinates: [[32, 33], [33, 32]] | |
113 | + } | |
114 | + } | |
115 | + } | |
116 | + }, Region | |
117 | + | |
118 | + # with search | |
119 | + assert_search "witch", ["Region A"], { | |
120 | + where: { | |
121 | + territory: { | |
122 | + geo_shape: { | |
123 | + type: "envelope", | |
124 | + coordinates: [[28, 42], [32, 38]] | |
125 | + } | |
126 | + } | |
127 | + } | |
128 | + }, Region | |
129 | + | |
130 | + assert_search "ginger hair", [], { | |
131 | + where: { | |
132 | + territory: { | |
133 | + geo_shape: { | |
134 | + type: "envelope", | |
135 | + coordinates: [[28, 42], [32, 38]] | |
136 | + } | |
137 | + } | |
138 | + } | |
139 | + }, Region | |
140 | + | |
141 | + end | |
142 | + | |
143 | +end | ... | ... |
test/test_helper.rb
... | ... | @@ -93,6 +93,14 @@ if defined?(Mongoid) |
93 | 93 | field :name |
94 | 94 | end |
95 | 95 | |
96 | + class Region | |
97 | + include Mongoid::Document | |
98 | + | |
99 | + field :name | |
100 | + field :text | |
101 | + field :territory | |
102 | + end | |
103 | + | |
96 | 104 | class Speaker |
97 | 105 | include Mongoid::Document |
98 | 106 | |
... | ... | @@ -143,6 +151,15 @@ elsif defined?(NoBrainer) |
143 | 151 | field :name, type: String |
144 | 152 | end |
145 | 153 | |
154 | + class Region | |
155 | + include NoBrainer::Document | |
156 | + | |
157 | + field :id, type: Object | |
158 | + field :name, type: String | |
159 | + field :text, type: Text | |
160 | + field :territory, type: Text | |
161 | + end | |
162 | + | |
146 | 163 | class Speaker |
147 | 164 | include NoBrainer::Document |
148 | 165 | |
... | ... | @@ -234,6 +251,12 @@ else |
234 | 251 | t.string :name |
235 | 252 | end |
236 | 253 | |
254 | + ActiveRecord::Migration.create_table :regions do |t| | |
255 | + t.string :name | |
256 | + t.text :text | |
257 | + t.text :territory | |
258 | + end | |
259 | + | |
237 | 260 | ActiveRecord::Migration.create_table :speakers do |t| |
238 | 261 | t.string :name |
239 | 262 | end |
... | ... | @@ -250,6 +273,9 @@ else |
250 | 273 | has_many :products |
251 | 274 | end |
252 | 275 | |
276 | + class Region < ActiveRecord::Base | |
277 | + end | |
278 | + | |
253 | 279 | class Speaker < ActiveRecord::Base |
254 | 280 | end |
255 | 281 | |
... | ... | @@ -338,6 +364,33 @@ class Store |
338 | 364 | end |
339 | 365 | end |
340 | 366 | |
367 | +class Region | |
368 | + searchkick \ | |
369 | + geo_shapes: { | |
370 | + territory: {tree: "quadtree", precision: "10km"} | |
371 | + } | |
372 | + | |
373 | + def search_data | |
374 | + { | |
375 | + name: name, | |
376 | + text: text, | |
377 | + territory: as_geo_json | |
378 | + } | |
379 | + end | |
380 | + | |
381 | + def as_geo_json | |
382 | + { | |
383 | + type: "polygon", | |
384 | + coordinates: [territory_path] # enclosing array because polygon can also have exclusion paths. | |
385 | + } | |
386 | + end | |
387 | + | |
388 | + def territory_path | |
389 | + path = territory.split(',').map(&:to_f).each_slice(2).to_a | |
390 | + path | |
391 | + end | |
392 | +end | |
393 | + | |
341 | 394 | class Speaker |
342 | 395 | searchkick \ |
343 | 396 | conversions: ["conversions_a", "conversions_b"] |
... | ... | @@ -369,6 +422,9 @@ Store.reindex |
369 | 422 | Animal.reindex |
370 | 423 | Speaker.reindex |
371 | 424 | |
425 | +Region.searchkick_index.delete if Region.searchkick_index.exists? | |
426 | +Region.reindex | |
427 | + | |
372 | 428 | class Minitest::Test |
373 | 429 | def setup |
374 | 430 | Product.destroy_all | ... | ... |