Class: PUIIndexer
Constant Summary
collapse
- PUI_RESOLVES =
AppConfig[:record_inheritance_resolves]
PeriodicIndexer::WORKER_STATUS_INDEX_ERROR, PeriodicIndexer::WORKER_STATUS_INDEX_SUCCESS, PeriodicIndexer::WORKER_STATUS_NOTHING_INDEXED
Class Method Summary
collapse
Instance Method Summary
collapse
#handle_deletes, #log, #run, #run_index_round, #start_worker_thread
#add_agents, #add_arks, add_attribute_to_resolve, #add_audit_info, #add_batch_hook, #add_delete_hook, #add_document_prepare_hook, #add_extents, #add_extra_documents_hook, add_indexer_initialize_hook, #add_level, #add_subjects, #add_subjects_subrecord, #add_summary, #add_years, #apply_pui_fields, #clean_for_sort, #clean_whitespace, #dedupe_by_uri, #delete_records, #do_http_request, #enum_fields, extract_string_values, generate_permutations_for_identifier, generate_sort_string_for_identifier, generate_years_for_date_range, #get_record_scope, #index_batch, #index_records, #is_repository_unpublished?, #login, pause, paused?, #paused?, #record_has_children, #records_with_children, #reset_session, #sanitize_json, #send_commit, #solr_url, #trim_ark_value
Constructor Details
#initialize(backend = nil, state = nil, name) ⇒ PUIIndexer
Returns a new instance of PUIIndexer.
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
|
# File 'indexer/app/lib/pui_indexer.rb', line 12
def initialize(backend = nil, state = nil, name)
state_class = AppConfig[:index_state_class].constantize
index_state = state || state_class.new("indexer_pui_state")
super(backend, index_state, name)
RecordInheritance.prepare_schemas
@time_to_sleep = AppConfig[:pui_indexing_frequency_seconds].to_i
@thread_count = AppConfig[:pui_indexer_thread_count].to_i
@records_per_thread = AppConfig[:pui_indexer_records_per_thread].to_i
@unpublished_records = java.util.Collections.synchronizedList(java.util.ArrayList.new)
end
|
Class Method Details
.get_indexer(state = nil, name = "PUI Indexer") ⇒ Object
37
38
39
|
# File 'indexer/app/lib/pui_indexer.rb', line 37
def self.get_indexer(state = nil, name = "PUI Indexer")
indexer = self.new(state, name)
end
|
Instance Method Details
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
|
# File 'indexer/app/lib/pui_indexer.rb', line 122
def add_infscroll_docs(resource_uris, batch)
resource_uris.each do |resource_uri|
json = JSONModel::HTTP.get_json(resource_uri + '/ordered_records')
batch << {
'id' => "#{resource_uri}/ordered_records",
'uri' => "#{resource_uri}/ordered_records",
'pui_parent_id' => resource_uri,
'publish' => "true",
'primary_type' => "resource_ordered_records",
'types' => ['pui'],
'json' => ASUtils.to_json(json)
}
end
end
|
#add_notes(doc, record) ⇒ Object
116
117
118
119
120
|
# File 'indexer/app/lib/pui_indexer.rb', line 116
def add_notes(doc, record)
if record['record']['notes']
doc['notes_published'] = IndexerCommon.(record['record']['notes'], :published_only)
end
end
|
#build_fullrecord(doc, record) ⇒ Object
112
113
114
|
# File 'indexer/app/lib/pui_indexer.rb', line 112
def build_fullrecord(doc, record)
doc['fullrecord_published'] = IndexerCommon.(record['record'], :published_only)
end
|
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
|
# File 'indexer/app/lib/pui_indexer.rb', line 51
def configure_doc_rules
super
record_has_children('resource')
record_has_children('archival_object')
record_has_children('digital_object')
record_has_children('digital_object_component')
record_has_children('classification')
record_has_children('classification_term')
add_document_prepare_hook {|doc, record|
if RecordInheritance.has_type?(doc['primary_type'])
parent_id = doc['id']
doc['id'] = "#{parent_id}#pui"
doc['pui_parent_id'] = parent_id
doc['types'] ||= []
doc['types'] << 'pui'
doc['types'] << "pui_#{doc['primary_type']}"
doc['types'] << 'pui_record'
doc['types'] << 'pui_only'
end
}
add_document_prepare_hook {|doc, record|
if RecordInheritance.has_type?(doc['primary_type'])
doc['json'] = ASUtils.to_json(RecordInheritance.merge(record['record'],
:remove_ancestors => true))
doc['title'] = record['record']['title'] if record['record']['title']
record['record'].delete('ancestors')
if record['record']['instances']
record['record']['instances'].each do |instance|
if instance['sub_container'] && instance['sub_container']['top_container']
top_container = instance['sub_container']['top_container']
if top_container['_resolved']
top_container['_resolved'].delete('internal_note')
if top_container['_resolved']['container_profile'] && top_container['_resolved']['container_profile']['_resolved']
top_container['_resolved']['container_profile']['_resolved'].delete('notes')
end
end
end
end
end
build_fullrecord(doc, record)
end
}
end
|
#fetch_records(type, ids, resolve) ⇒ Object
28
29
30
31
32
33
34
35
|
# File 'indexer/app/lib/pui_indexer.rb', line 28
def fetch_records(type, ids, resolve)
records = JSONModel(type).all(:id_set => ids.join(","), 'resolve[]' => resolve)
if RecordInheritance.has_type?(type)
RecordInheritance.merge(records, :direct_only => true)
else
records
end
end
|
#index_round_complete(repository) ⇒ Object
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
|
# File 'indexer/app/lib/pui_indexer.rb', line 155
def index_round_complete(repository)
tree_types = [[:resource, :archival_object],
[:digital_object, :digital_object_component],
[:classification, :classification_term]]
start = Time.now
checkpoints = []
update_mtimes = false
tree_uris = []
tree_types.each do |pair|
root_type = pair.first
node_type = pair.last
checkpoints << [repository, root_type, start]
checkpoints << [repository, node_type, start]
last_root_node_mtime = [@state.get_last_mtime(repository.id, root_type) - @window_seconds, 0].max
last_node_mtime = [@state.get_last_mtime(repository.id, node_type) - @window_seconds, 0].max
root_node_ids = Set.new(JSONModel::HTTP.get_json(JSONModel(root_type).uri_for, :all_ids => true, :modified_since => last_root_node_mtime))
node_ids = JSONModel::HTTP.get_json(JSONModel(node_type).uri_for, :all_ids => true, :modified_since => last_node_mtime)
node_ids.each_slice(@records_per_thread) do |ids|
node_records = JSONModel(node_type).all(:id_set => ids.join(","), 'resolve[]' => [])
node_records.each do |record|
root_node_ids << JSONModel.parse_reference(record[root_type.to_s]['ref']).fetch(:id)
end
end
tree_uris.concat(root_node_ids.map {|id| JSONModel(root_type).uri_for(id) })
end
batch = IndexBatch.new
add_infscroll_docs(tree_uris.select {|uri| JSONModel.parse_reference(uri).fetch(:type) == 'resource'},
batch)
tree_indexer = LargeTreeDocIndexer.new(batch)
tree_indexer.add_largetree_docs(tree_uris)
if batch.length > 0
log "Indexed #{batch.length} additional PUI records in repository #{repository.repo_code}"
index_batch(batch, nil, :parent_id_field => 'pui_parent_id')
send_commit
update_mtimes = true
end
if tree_indexer.deletes.length > 0
tree_indexer.deletes.each_slice(100) do |deletes|
delete_records(deletes, :parent_id_field => 'pui_parent_id')
end
end
handle_deletes(:parent_id_field => 'pui_parent_id')
delete_records(@unpublished_records, :parent_id_field => 'pui_parent_id')
@unpublished_records.clear()
checkpoints.each do |repository, type, start|
@state.set_last_mtime(repository.id, type, start) if update_mtimes
end
end
|
#record_types ⇒ Object
45
46
47
48
49
|
# File 'indexer/app/lib/pui_indexer.rb', line 45
def record_types
(super.select {|type| RecordInheritance.has_type?(type)} + [:archival_object]).uniq
end
|
#repositories_updated_action(updated_repositories) ⇒ Object
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
|
# File 'indexer/app/lib/pui_indexer.rb', line 229
def repositories_updated_action(updated_repositories)
updated_repositories.each do |repository|
if !repository['record']['publish']
req = Net::HTTP::Post.new("#{solr_url.path}/update")
req['Content-Type'] = 'application/json'
delete_request = {:delete => {'query' => "repository:\"#{repository['uri']}\" AND types:pui_only"}}
req.body = delete_request.to_json
response = do_http_request(solr_url, req)
if response.code == '200'
Log.info "Deleted PUI-only documents in private repository #{repository['record']['repo_code']}: #{response}"
else
Log.error "SolrIndexerError when deleting PUI-only records in private repository #{repository['record']['repo_code']}: #{response.body}"
end
end
end
end
|
#resolved_attributes ⇒ Object
41
42
43
|
# File 'indexer/app/lib/pui_indexer.rb', line 41
def resolved_attributes
super + PUI_RESOLVES
end
|
#skip_index_doc?(doc) ⇒ Boolean
147
148
149
150
151
152
153
|
# File 'indexer/app/lib/pui_indexer.rb', line 147
def skip_index_doc?(doc)
published = doc['publish']
stage_unpublished_for_deletion(doc['id']) unless published
!published
end
|
#skip_index_record?(record) ⇒ Boolean
138
139
140
141
142
143
144
|
# File 'indexer/app/lib/pui_indexer.rb', line 138
def skip_index_record?(record)
published = record['record']['publish']
stage_unpublished_for_deletion("#{record['record']['uri']}#pui") unless published
!published
end
|
#stage_unpublished_for_deletion(doc_id) ⇒ Object
225
226
227
|
# File 'indexer/app/lib/pui_indexer.rb', line 225
def stage_unpublished_for_deletion(doc_id)
@unpublished_records.add(doc_id) if doc_id =~ /#pui$/
end
|