Commit 40049809f48ce41c2a2aa69528fc315426da2429
1 parent
326e97bb
Exists in
master
and in
21 other branches
geo_shape support in index and query, without much sugar
Showing
5 changed files
with
319 additions
and
2 deletions
Show diff stats
README.md
@@ -834,7 +834,9 @@ product = Product.first | @@ -834,7 +834,9 @@ product = Product.first | ||
834 | product.similar(fields: ["name"], where: {size: "12 oz"}) | 834 | product.similar(fields: ["name"], where: {size: "12 oz"}) |
835 | ``` | 835 | ``` |
836 | 836 | ||
837 | -### Geospatial Searches | 837 | +### Simple Geospatial Indexing: geo_points |
838 | + | ||
839 | +If your data consists of point values, searchkick offers a useful shorthand: | ||
838 | 840 | ||
839 | ```ruby | 841 | ```ruby |
840 | class City < ActiveRecord::Base | 842 | class City < ActiveRecord::Base |
@@ -846,7 +848,9 @@ class City < ActiveRecord::Base | @@ -846,7 +848,9 @@ class City < ActiveRecord::Base | ||
846 | end | 848 | end |
847 | ``` | 849 | ``` |
848 | 850 | ||
849 | -Reindex and search with: | 851 | +Elasticsearch supports a range of useful search types for geo_point data: |
852 | + | ||
853 | +Within a radius | ||
850 | 854 | ||
851 | ```ruby | 855 | ```ruby |
852 | City.search "san", where: {location: {near: {lat: 37, lon: -114}, within: "100mi"}} # or 160km | 856 | City.search "san", where: {location: {near: {lat: 37, lon: -114}, within: "100mi"}} # or 160km |
@@ -878,6 +882,81 @@ Also supports [additional options](https://www.elastic.co/guide/en/elasticsearch | @@ -878,6 +882,81 @@ Also supports [additional options](https://www.elastic.co/guide/en/elasticsearch | ||
878 | City.search "san", boost_by_distance: {field: :location, origin: {lat: 37, lon: -122}, function: :linear, scale: "30mi", decay: 0.5} | 882 | City.search "san", boost_by_distance: {field: :location, origin: {lat: 37, lon: -122}, function: :linear, scale: "30mi", decay: 0.5} |
879 | ``` | 883 | ``` |
880 | 884 | ||
885 | + | ||
886 | +### Complex Geospatial Indexing: geo_shapes | ||
887 | + | ||
888 | +You can also pass through complex or varied shapes as GeoJSON objects. | ||
889 | + | ||
890 | +```ruby | ||
891 | +class City < ActiveRecord::Base | ||
892 | + searchkick geo_shapes: { | ||
893 | + bounds: {tree: "geohash", precision: "1km"} | ||
894 | + perimeter: {tree: "quadtree", precision: "10m"} | ||
895 | + } | ||
896 | + | ||
897 | + def search_data | ||
898 | + attributes.merge { | ||
899 | + bounds: { | ||
900 | + type: "envelope", | ||
901 | + coordinates: [{lat: 4, lon: 1}, {lat: 2, lon: 3}] | ||
902 | + }, | ||
903 | + perimeter: { | ||
904 | + type: "polygon", | ||
905 | + coordinates: [[{lat: 1, lon: 2}, {lat: 3, lon: 4}, {lat: 5, lon: 6}, ...]] | ||
906 | + } | ||
907 | + } | ||
908 | + end | ||
909 | +end | ||
910 | +``` | ||
911 | + | ||
912 | +The `geo_shapes` hash is passed through to elasticsearch without modification. Please see the [geo_shape data type documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/geo-shape.html) for options. | ||
913 | + | ||
914 | +Any geospatial data type can be used in the index or in the search. It is up to you to ensure that it is a valid geoJSON representation. The possible shapes are: | ||
915 | + | ||
916 | +* **point**: single lat/lon pair | ||
917 | +* **multipoint**: array of points | ||
918 | +* **linestring**: array of at least two lat/lon pairs | ||
919 | +* **multilinestring**: array of lines | ||
920 | +* **polygon**: an array of paths, each being an array of at least four lat/lon pairs whose first and last points are the same. Paths after the first represent exclusions. | ||
921 | +* **multipolygon**: array of polygons | ||
922 | +* **envelope**: a bounding box defined by top left and bottom right points | ||
923 | +* **circle**: a bounding circle defined by center point and radius | ||
924 | +* **geometrycollection**: an array of separate geoJSON objects possibly of various types | ||
925 | + | ||
926 | +See the [Elasticsearch documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/geo-shape.html) for details. GeoJSON coordinates are usually given as an array of `[lon, lat]` points but this often causes swapping errors so searchkick can also take objects with `lon` and `lat` keys. | ||
927 | + | ||
928 | +Elasticsearch is sensitive about geo_shape validity. For example it will throw an exception if a polygon contains two consecutive identical points, or is not properly closed. You probably want to validate the data during indexing. | ||
929 | + | ||
930 | + | ||
931 | +### Geospatial searching | ||
932 | + | ||
933 | +Once a geo_shape index is established, you can apply a geo_shape filter to any search. This also takes a geoJSON shape, and will return a list of items based on their overlap with that shape. | ||
934 | + | ||
935 | +Intersecting with the query shape: | ||
936 | + | ||
937 | +```ruby | ||
938 | +City.search "san", where: {bounds: {geo_shape: {type: "polygon", coordinates: [[{lat: 38, lon: -123}, ...]]}}} | ||
939 | +``` | ||
940 | + | ||
941 | +Falling entirely within the query shape: | ||
942 | + | ||
943 | +```ruby | ||
944 | +City.search "san", where: {relation: "within", bounds: {geo_shape: {type: "circle", coordinates: [{lat: 38, lon: -123}], radius: "1km"}}} | ||
945 | +``` | ||
946 | + | ||
947 | +Not touching the query shape: | ||
948 | + | ||
949 | +```ruby | ||
950 | +City.search "san", where: {relation: "disjoint", bounds: {geo_shape: {type: "envelope", coordinates: [{lat: 38, lon: -123}, {lat: 37, lon: -122}]}}} | ||
951 | +``` | ||
952 | + | ||
953 | +Envelope is a special case. For consistency, searchkick also understands coordinates given as top_left and bottom_right: | ||
954 | + | ||
955 | +```ruby | ||
956 | +City.search "san", where: {relation: "within", bounds: {geo_shape: {type: "envelope", top_left: {lat: 38, lon: -123}, bottom_right: {lat: 37, lon: -122}}}} | ||
957 | +``` | ||
958 | + | ||
959 | + | ||
881 | ### Routing | 960 | ### Routing |
882 | 961 | ||
883 | Searchkick supports [Elasticsearchโs routing feature](https://www.elastic.co/blog/customizing-your-document-routing). | 962 | Searchkick supports [Elasticsearchโs routing feature](https://www.elastic.co/blog/customizing-your-document-routing). |
lib/searchkick/index_options.rb
@@ -281,6 +281,11 @@ module Searchkick | @@ -281,6 +281,11 @@ module Searchkick | ||
281 | } | 281 | } |
282 | end | 282 | end |
283 | 283 | ||
284 | + options[:geo_shapes] = options[:geo_shapes].product([{}]).to_h if options[:geo_shapes].is_a? Array | ||
285 | + (options[:geo_shapes] || {}).each do |field, shape_options| | ||
286 | + mapping[field] = shape_options.merge(type: "geo_shape") | ||
287 | + end | ||
288 | + | ||
284 | (options[:unsearchable] || []).map(&:to_s).each do |field| | 289 | (options[:unsearchable] || []).map(&:to_s).each do |field| |
285 | mapping[field] = { | 290 | mapping[field] = { |
286 | type: default_type, | 291 | type: default_type, |
lib/searchkick/query.rb
@@ -808,6 +808,25 @@ module Searchkick | @@ -808,6 +808,25 @@ module Searchkick | ||
808 | field => op_value | 808 | field => op_value |
809 | } | 809 | } |
810 | } | 810 | } |
811 | + when :geo_shape | ||
812 | + if op_value[:type] == "envelope" && op_value[:top_left].present? && op_value[:bottom_right].present? | ||
813 | + op_value[:coordinates] = [coordinate_array(op_value[:top_left]), coordinate_array(op_value[:bottom_right])] | ||
814 | + op_value.delete(:top_left) | ||
815 | + op_value.delete(:bottom_right) | ||
816 | + elsif op_value[:type] == "circle" | ||
817 | + op_value[:coordinates] = coordinate_array(op_value[:coordinates] || []) | ||
818 | + else | ||
819 | + op_value[:coordinates] = (op_value[:coordinates] || []).map { |loc| coordinate_array(loc) } | ||
820 | + end | ||
821 | + relation = op_value.delete(:relation) || 'intersects' | ||
822 | + filters << { | ||
823 | + geo_shape: { | ||
824 | + field => { | ||
825 | + relation: relation, | ||
826 | + shape: op_value | ||
827 | + } | ||
828 | + } | ||
829 | + } | ||
811 | when :top_left | 830 | when :top_left |
812 | filters << { | 831 | filters << { |
813 | geo_bounding_box: { | 832 | geo_bounding_box: { |
@@ -923,6 +942,21 @@ module Searchkick | @@ -923,6 +942,21 @@ module Searchkick | ||
923 | end | 942 | end |
924 | end | 943 | end |
925 | 944 | ||
945 | + # Recursively descend through nesting of arrays until we reach either a lat/lon object or an array of numbers, | ||
946 | + # eventually returning the same structure with all values transformed to [lon, lat]. Question: should we reverse | ||
947 | + # the array order so that arguments can be given as [lat, lon], as happens elsewhere in searchkick? We are moving | ||
948 | + # GeoJSON around so it seems better to stick to that specification, though the lat/lon objects are already a deviation. | ||
949 | + # | ||
950 | + def coordinate_array(value) | ||
951 | + if value.is_a?(Hash) | ||
952 | + [value[:lon], value[:lat]] | ||
953 | + elsif value.is_a?(Array) and !value[0].is_a?(Numeric) | ||
954 | + value.map {|a| coordinate_array(a) } | ||
955 | + else | ||
956 | + value | ||
957 | + end | ||
958 | + end | ||
959 | + | ||
926 | def location_value(value) | 960 | def location_value(value) |
927 | if value.is_a?(Array) | 961 | if value.is_a?(Array) |
928 | value.map(&:to_f).reverse | 962 | value.map(&:to_f).reverse |
@@ -0,0 +1,143 @@ | @@ -0,0 +1,143 @@ | ||
1 | +require "pp" | ||
2 | +require_relative "test_helper" | ||
3 | + | ||
4 | +class GeoShapeTest < Minitest::Test | ||
5 | + | ||
6 | + def test_geo_shape | ||
7 | + regions = [ | ||
8 | + {name: "Region A", text: "The witch had a cat", territory: "30,40,35,45,40,40,40,30,30,30,30,40"}, | ||
9 | + {name: "Region B", text: "and a very tall hat", territory: "50,60,55,65,60,60,60,50,50,50,50,60"}, | ||
10 | + {name: "Region C", text: "and long ginger hair which she wore in a plait.", territory: "10,20,15,25,20,20,20,10,10,10,10,20"}, | ||
11 | + ] | ||
12 | + store regions, Region | ||
13 | + | ||
14 | + # circle | ||
15 | + assert_search "*", ["Region A"], { | ||
16 | + where: { | ||
17 | + territory: { | ||
18 | + geo_shape: { | ||
19 | + type: "circle", | ||
20 | + coordinates: {lat: 28.0, lon: 38.0}, | ||
21 | + radius: "444000m" | ||
22 | + } | ||
23 | + } | ||
24 | + } | ||
25 | + }, Region | ||
26 | + | ||
27 | + # envelope | ||
28 | + assert_search "*", ["Region A"], { | ||
29 | + where: { | ||
30 | + territory: { | ||
31 | + geo_shape: { | ||
32 | + type: "envelope", | ||
33 | + coordinates: [[28, 42], [32, 38]] | ||
34 | + } | ||
35 | + } | ||
36 | + } | ||
37 | + }, Region | ||
38 | + | ||
39 | + # envelope as corners | ||
40 | + assert_search "*", ["Region A"], { | ||
41 | + where: { | ||
42 | + territory: { | ||
43 | + geo_shape: { | ||
44 | + type: "envelope", | ||
45 | + top_left: {lat: 42.0, lon: 28.0}, | ||
46 | + bottom_right: {lat: 38.0, lon: 32.0} | ||
47 | + } | ||
48 | + } | ||
49 | + } | ||
50 | + }, Region | ||
51 | + | ||
52 | + # polygon | ||
53 | + assert_search "*", ["Region A"], { | ||
54 | + where: { | ||
55 | + territory: { | ||
56 | + geo_shape: { | ||
57 | + type: "polygon", | ||
58 | + coordinates: [[[38, 42], [42, 42], [42, 38], [38, 38], [38, 42]]] | ||
59 | + } | ||
60 | + } | ||
61 | + } | ||
62 | + }, Region | ||
63 | + | ||
64 | + # multipolygon | ||
65 | + assert_search "*", ["Region A", "Region B"], { | ||
66 | + where: { | ||
67 | + territory: { | ||
68 | + geo_shape: { | ||
69 | + type: "multipolygon", | ||
70 | + coordinates: [ | ||
71 | + [[[38, 42], [42, 42], [42, 38], [38, 38], [38, 42]]], | ||
72 | + [[[58, 62], [62, 62], [62, 58], [58, 58], [58, 62]]] | ||
73 | + ] | ||
74 | + } | ||
75 | + } | ||
76 | + } | ||
77 | + }, Region | ||
78 | + | ||
79 | + # disjoint | ||
80 | + assert_search "*", ["Region B", "Region C"], { | ||
81 | + where: { | ||
82 | + territory: { | ||
83 | + geo_shape: { | ||
84 | + type: "envelope", | ||
85 | + relation: "disjoint", | ||
86 | + coordinates: [[28, 42], [32, 38]] | ||
87 | + } | ||
88 | + } | ||
89 | + } | ||
90 | + }, Region | ||
91 | + | ||
92 | + # within | ||
93 | + assert_search "*", ["Region A"], { | ||
94 | + where: { | ||
95 | + territory: { | ||
96 | + geo_shape: { | ||
97 | + type: "envelope", | ||
98 | + relation: "within", | ||
99 | + coordinates: [[20, 50], [50, 20]] | ||
100 | + } | ||
101 | + } | ||
102 | + } | ||
103 | + }, Region | ||
104 | + | ||
105 | + # contains | ||
106 | + assert_search "*", ["Region A"], { | ||
107 | + where: { | ||
108 | + territory: { | ||
109 | + geo_shape: { | ||
110 | + type: "envelope", | ||
111 | + relation: "contains", | ||
112 | + coordinates: [[32, 33], [33, 32]] | ||
113 | + } | ||
114 | + } | ||
115 | + } | ||
116 | + }, Region | ||
117 | + | ||
118 | + # with search | ||
119 | + assert_search "witch", ["Region A"], { | ||
120 | + where: { | ||
121 | + territory: { | ||
122 | + geo_shape: { | ||
123 | + type: "envelope", | ||
124 | + coordinates: [[28, 42], [32, 38]] | ||
125 | + } | ||
126 | + } | ||
127 | + } | ||
128 | + }, Region | ||
129 | + | ||
130 | + assert_search "ginger hair", [], { | ||
131 | + where: { | ||
132 | + territory: { | ||
133 | + geo_shape: { | ||
134 | + type: "envelope", | ||
135 | + coordinates: [[28, 42], [32, 38]] | ||
136 | + } | ||
137 | + } | ||
138 | + } | ||
139 | + }, Region | ||
140 | + | ||
141 | + end | ||
142 | + | ||
143 | +end |
test/test_helper.rb
@@ -93,6 +93,14 @@ if defined?(Mongoid) | @@ -93,6 +93,14 @@ if defined?(Mongoid) | ||
93 | field :name | 93 | field :name |
94 | end | 94 | end |
95 | 95 | ||
96 | + class Region | ||
97 | + include Mongoid::Document | ||
98 | + | ||
99 | + field :name | ||
100 | + field :text | ||
101 | + field :territory | ||
102 | + end | ||
103 | + | ||
96 | class Speaker | 104 | class Speaker |
97 | include Mongoid::Document | 105 | include Mongoid::Document |
98 | 106 | ||
@@ -143,6 +151,15 @@ elsif defined?(NoBrainer) | @@ -143,6 +151,15 @@ elsif defined?(NoBrainer) | ||
143 | field :name, type: String | 151 | field :name, type: String |
144 | end | 152 | end |
145 | 153 | ||
154 | + class Region | ||
155 | + include NoBrainer::Document | ||
156 | + | ||
157 | + field :id, type: Object | ||
158 | + field :name, type: String | ||
159 | + field :text, type: Text | ||
160 | + field :territory, type: Text | ||
161 | + end | ||
162 | + | ||
146 | class Speaker | 163 | class Speaker |
147 | include NoBrainer::Document | 164 | include NoBrainer::Document |
148 | 165 | ||
@@ -234,6 +251,12 @@ else | @@ -234,6 +251,12 @@ else | ||
234 | t.string :name | 251 | t.string :name |
235 | end | 252 | end |
236 | 253 | ||
254 | + ActiveRecord::Migration.create_table :regions do |t| | ||
255 | + t.string :name | ||
256 | + t.text :text | ||
257 | + t.text :territory | ||
258 | + end | ||
259 | + | ||
237 | ActiveRecord::Migration.create_table :speakers do |t| | 260 | ActiveRecord::Migration.create_table :speakers do |t| |
238 | t.string :name | 261 | t.string :name |
239 | end | 262 | end |
@@ -250,6 +273,9 @@ else | @@ -250,6 +273,9 @@ else | ||
250 | has_many :products | 273 | has_many :products |
251 | end | 274 | end |
252 | 275 | ||
276 | + class Region < ActiveRecord::Base | ||
277 | + end | ||
278 | + | ||
253 | class Speaker < ActiveRecord::Base | 279 | class Speaker < ActiveRecord::Base |
254 | end | 280 | end |
255 | 281 | ||
@@ -338,6 +364,33 @@ class Store | @@ -338,6 +364,33 @@ class Store | ||
338 | end | 364 | end |
339 | end | 365 | end |
340 | 366 | ||
367 | +class Region | ||
368 | + searchkick \ | ||
369 | + geo_shapes: { | ||
370 | + territory: {tree: "quadtree", precision: "10km"} | ||
371 | + } | ||
372 | + | ||
373 | + def search_data | ||
374 | + { | ||
375 | + name: name, | ||
376 | + text: text, | ||
377 | + territory: as_geo_json | ||
378 | + } | ||
379 | + end | ||
380 | + | ||
381 | + def as_geo_json | ||
382 | + { | ||
383 | + type: "polygon", | ||
384 | + coordinates: [territory_path] # enclosing array because polygon can also have exclusion paths. | ||
385 | + } | ||
386 | + end | ||
387 | + | ||
388 | + def territory_path | ||
389 | + path = territory.split(',').map(&:to_f).each_slice(2).to_a | ||
390 | + path | ||
391 | + end | ||
392 | +end | ||
393 | + | ||
341 | class Speaker | 394 | class Speaker |
342 | searchkick \ | 395 | searchkick \ |
343 | conversions: ["conversions_a", "conversions_b"] | 396 | conversions: ["conversions_a", "conversions_b"] |
@@ -369,6 +422,9 @@ Store.reindex | @@ -369,6 +422,9 @@ Store.reindex | ||
369 | Animal.reindex | 422 | Animal.reindex |
370 | Speaker.reindex | 423 | Speaker.reindex |
371 | 424 | ||
425 | +Region.searchkick_index.delete if Region.searchkick_index.exists? | ||
426 | +Region.reindex | ||
427 | + | ||
372 | class Minitest::Test | 428 | class Minitest::Test |
373 | def setup | 429 | def setup |
374 | Product.destroy_all | 430 | Product.destroy_all |