Class: PUIIndexer
Constant Summary
collapse
- PUI_RESOLVES =
AppConfig[:record_inheritance_resolves]
PeriodicIndexer::WORKER_STATUS_INDEX_ERROR, PeriodicIndexer::WORKER_STATUS_INDEX_SUCCESS, PeriodicIndexer::WORKER_STATUS_NOTHING_INDEXED
IndexerCommon::MAX_PENDING_DELETES
Class Method Summary
collapse
Instance Method Summary
collapse
#handle_deletes, #log, #run, #run_index_round, #start_worker_thread
#add_agents, #add_arks, add_attribute_to_resolve, #add_audit_info, #add_batch_hook, #add_delete_hook, #add_document_prepare_hook, #add_extents, #add_extra_documents_hook, add_indexer_initialize_hook, #add_level, #add_subjects, #add_subjects_subrecord, #add_summary, #add_years, #apply_pui_fields, #clean_for_sort, #clean_whitespace, #dedupe_by_uri, #delete_records, #do_http_request, #enum_fields, extract_string_values, generate_permutations_for_identifier, generate_sort_string_for_identifier, generate_years_for_date_range, #get_record_scope, #index_batch, #index_records, #is_repository_unpublished?, #login, pause, paused?, #paused?, #record_has_children, #records_with_children, #reset_session, #sanitize_json, #send_commit, #solr_url, #trim_ark_value
Constructor Details
#initialize(backend = nil, state = nil, name) ⇒ PUIIndexer
Returns a new instance of PUIIndexer.
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
|
# File 'indexer/app/lib/pui_indexer.rb', line 12
def initialize(backend = nil, state = nil, name)
state_class = AppConfig[:index_state_class].constantize
index_state = state || state_class.new("indexer_pui_state")
super(backend, index_state, name)
RecordInheritance.prepare_schemas
@time_to_sleep = AppConfig[:pui_indexing_frequency_seconds].to_i
@thread_count = AppConfig[:pui_indexer_thread_count].to_i
@records_per_thread = AppConfig[:pui_indexer_records_per_thread].to_i
@unpublished_records = java.util.Collections.synchronizedList(java.util.ArrayList.new)
end
|
Class Method Details
.get_indexer(state = nil, name = "PUI Indexer") ⇒ Object
37
38
39
|
# File 'indexer/app/lib/pui_indexer.rb', line 37
def self.get_indexer(state = nil, name = "PUI Indexer")
indexer = self.new(state, name)
end
|
Instance Method Details
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
|
# File 'indexer/app/lib/pui_indexer.rb', line 126
def add_infscroll_docs(resource_uris, batch)
resource_uris.each do |resource_uri|
json = JSONModel::HTTP.get_json(resource_uri + '/ordered_records')
batch << {
'id' => "#{resource_uri}/ordered_records",
'uri' => "#{resource_uri}/ordered_records",
'pui_parent_id' => resource_uri,
'publish' => "true",
'primary_type' => "resource_ordered_records",
'types' => ['pui'],
'json' => ASUtils.to_json(json)
}
end
end
|
#add_notes(doc, record) ⇒ Object
120
121
122
123
124
|
# File 'indexer/app/lib/pui_indexer.rb', line 120
def add_notes(doc, record)
if record['record']['notes']
doc['notes_published'] = IndexerCommon.(record['record']['notes'], :published_only)
end
end
|
#build_fullrecord(doc, record) ⇒ Object
116
117
118
|
# File 'indexer/app/lib/pui_indexer.rb', line 116
def build_fullrecord(doc, record)
doc['fullrecord_published'] = IndexerCommon.(record['record'], :published_only)
end
|
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
|
# File 'indexer/app/lib/pui_indexer.rb', line 51
def configure_doc_rules
super
record_has_children('resource')
record_has_children('archival_object')
record_has_children('digital_object')
record_has_children('digital_object_component')
record_has_children('classification')
record_has_children('classification_term')
add_document_prepare_hook {|doc, record|
if RecordInheritance.has_type?(doc['primary_type'])
parent_id = doc['id']
doc['id'] = "#{parent_id}#pui"
doc['pui_parent_id'] = parent_id
doc['types'] ||= []
doc['types'] << 'pui'
doc['types'] << "pui_#{doc['primary_type']}"
doc['types'] << 'pui_record'
doc['types'] << 'pui_only'
end
}
end
|
#fetch_records(type, ids, resolve) ⇒ Object
28
29
30
31
32
33
34
35
|
# File 'indexer/app/lib/pui_indexer.rb', line 28
def fetch_records(type, ids, resolve)
records = JSONModel(type).all(:id_set => ids.join(","), 'resolve[]' => resolve)
if RecordInheritance.has_type?(type)
RecordInheritance.merge(records, :direct_only => true)
else
records
end
end
|
#final_doc_rules ⇒ Object
Run the final doc rules after all the hooks have been added
This allows plugins to access ancestor data in PUI records before it is removed
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
|
# File 'indexer/app/lib/pui_indexer.rb', line 79
def final_doc_rules
add_document_prepare_hook {|doc, record|
if RecordInheritance.has_type?(doc['primary_type'])
doc['json'] = ASUtils.to_json(RecordInheritance.merge(record['record'],
:remove_ancestors => true))
doc['title'] = record['record']['title'] if record['record']['title']
record['record'].delete('ancestors')
if record['record']['instances']
record['record']['instances'].each do |instance|
if instance['sub_container'] && instance['sub_container']['top_container']
top_container = instance['sub_container']['top_container']
if top_container['_resolved']
top_container['_resolved'].delete('internal_note')
if top_container['_resolved']['container_profile'] && top_container['_resolved']['container_profile']['_resolved']
top_container['_resolved']['container_profile']['_resolved'].delete('notes')
end
end
end
end
end
build_fullrecord(doc, record)
end
}
end
|
#index_round_complete(repository) ⇒ Object
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
|
# File 'indexer/app/lib/pui_indexer.rb', line 159
def index_round_complete(repository)
tree_types = [[:resource, :archival_object],
[:digital_object, :digital_object_component],
[:classification, :classification_term]]
start = Time.now
checkpoints = []
update_mtimes = false
state_type = 'tree'
tree_uris = []
tree_types.each do |pair|
root_type = pair.first
node_type = pair.last
checkpoints << [repository, root_type, start]
checkpoints << [repository, node_type, start]
last_root_node_mtime = [@state.get_last_mtime(repository.id, root_type, state_type) - @window_seconds, 0].max
last_node_mtime = [@state.get_last_mtime(repository.id, node_type, state_type) - @window_seconds, 0].max
root_node_ids = Set.new(JSONModel::HTTP.get_json(JSONModel(root_type).uri_for, :all_ids => true, :modified_since => last_root_node_mtime))
node_ids = JSONModel::HTTP.get_json(JSONModel(node_type).uri_for, :all_ids => true, :modified_since => last_node_mtime)
node_ids.each_slice(@records_per_thread) do |ids|
node_records = JSONModel(node_type).all(:id_set => ids.join(","), 'resolve[]' => [])
node_records.each do |record|
root_node_ids << JSONModel.parse_reference(record[root_type.to_s]['ref']).fetch(:id)
end
end
tree_uris.concat(root_node_ids.map {|id| JSONModel(root_type).uri_for(id) })
end
batch = IndexBatch.new
add_infscroll_docs(tree_uris.select {|uri| JSONModel.parse_reference(uri).fetch(:type) == 'resource'},
batch)
tree_indexer = LargeTreeDocIndexer.new(batch)
tree_indexer.add_largetree_docs(tree_uris)
if batch.length > 0
log "Indexed #{batch.length} additional PUI records in repository #{repository.repo_code}"
index_batch(batch, nil, :parent_id_field => 'pui_parent_id')
send_commit
update_mtimes = true
end
if tree_indexer.deletes.length > 0
tree_indexer.deletes.each_slice(100) do |deletes|
delete_records(deletes, :parent_id_field => 'pui_parent_id')
end
end
handle_deletes(:parent_id_field => 'pui_parent_id')
delete_records(@unpublished_records, :parent_id_field => 'pui_parent_id')
@unpublished_records.clear()
checkpoints.each do |repository, type, start|
@state.set_last_mtime(repository.id, type, start, state_type) if update_mtimes
end
end
|
#record_types ⇒ Object
45
46
47
48
49
|
# File 'indexer/app/lib/pui_indexer.rb', line 45
def record_types
(super.select {|type| RecordInheritance.has_type?(type)} + [:archival_object]).uniq
end
|
#repositories_updated_action(updated_repositories) ⇒ Object
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
|
# File 'indexer/app/lib/pui_indexer.rb', line 234
def repositories_updated_action(updated_repositories)
updated_repositories.each do |repository|
if !repository['record']['publish']
req = Net::HTTP::Post.new("#{solr_url.path}/update")
req['Content-Type'] = 'application/json'
delete_request = {:delete => {'query' => "repository:\"#{repository['uri']}\" AND types:pui_only"}}
req.body = delete_request.to_json
response = do_http_request(solr_url, req)
if response.code == '200'
Log.info "Deleted PUI-only documents in private repository #{repository['record']['repo_code']}: #{response}"
else
Log.error "SolrIndexerError when deleting PUI-only records in private repository #{repository['record']['repo_code']}: #{response.body}"
end
end
end
end
|
#resolved_attributes ⇒ Object
41
42
43
|
# File 'indexer/app/lib/pui_indexer.rb', line 41
def resolved_attributes
super + PUI_RESOLVES
end
|
#skip_index_doc?(doc) ⇒ Boolean
151
152
153
154
155
156
157
|
# File 'indexer/app/lib/pui_indexer.rb', line 151
def skip_index_doc?(doc)
published = doc['publish']
stage_unpublished_for_deletion(doc['id']) unless published
!published
end
|
#skip_index_record?(record) ⇒ Boolean
142
143
144
145
146
147
148
|
# File 'indexer/app/lib/pui_indexer.rb', line 142
def skip_index_record?(record)
published = record['record']['publish']
stage_unpublished_for_deletion("#{record['record']['uri']}#pui") unless published
!published
end
|
#stage_unpublished_for_deletion(doc_id) ⇒ Object
230
231
232
|
# File 'indexer/app/lib/pui_indexer.rb', line 230
def stage_unpublished_for_deletion(doc_id)
@unpublished_records.add(doc_id) if doc_id =~ /#pui$/
end
|