Skip to content

Commit 2ba0b68

Browse files
d-w-mooretrel
authored andcommitted
[#456] auto-close data objects that go out of scope
Previously, open data-object write handles (those with modes including 'w' or 'a') could go out of scope at the wrong time relative to the session object which managed their connection. This could result in pending write updates to the data object being lost, and/or the replica ending up stale. Now, we can opt-in to use "managed" write handles to ensure f.close() will ultimately be called for any write handle f persisting to the end of the Python interpreter's lifetime. Those that exit scope prior to exit time are also guaranteed (as much as Python allows) the same managed clean-up via their "__del__" method. remove unneeded quotes; unset script executable status
1 parent 9c27dd6 commit 2ba0b68

7 files changed

Lines changed: 196 additions & 10 deletions

File tree

README.rst

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,22 @@ PRC provides `file-like objects <http://docs.python.org/2/library/stdtypes.html#
298298
foo
299299
bar
300300

301+
As of v1.1.9, there is also an auto-close configuration setting for data objects, set to :code:`False` by default,
302+
which may be assigned the value :code:`True` for guaranteed auto-closing of open data object handles at the proper
303+
time.
304+
305+
In a small but illustrative example, the following Python session does not require an explicit call to f.close():
306+
307+
>>> import irods.client_configuration as config, irods.test.helpers as helpers
308+
>>> config.data_objects.auto_close = True
309+
>>> session = helpers.make_session()
310+
>>> f = session.data_objects.open('/{0.zone}/home/{0.username}/new_object.txt'.format(session),'w')
311+
>>> f.write(b'new content.')
312+
313+
This may be useful for Python programs in which frequent flushing of write updates to data objects is undesirable --
314+
with descriptors on such objects possibly being held open for indeterminately long lifetimes -- yet the eventual
315+
application of those updates prior to the teardown of the Python interpreter is required.
316+
301317

302318
Computing and Retrieving Checksums
303319
----------------------------------
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
from __future__ import print_function
2+
import ast
3+
import copy
4+
import io
5+
import logging
6+
import re
7+
import sys
8+
import types
9+
10+
logger = logging.Logger(__name__)
11+
12+
class iRODSConfiguration(object):
13+
__slots__ = ()
14+
15+
def getter(category, setting):
16+
return lambda:getattr(globals()[category], setting)
17+
18+
# #############################################################################
19+
#
20+
# Classes for building client configuration categories
21+
# (irods.client_configuration.data_objects is one such category):
22+
23+
class DataObjects(iRODSConfiguration):
24+
__slots__ = ('auto_close',)
25+
26+
def __init__(self):
27+
28+
# Setting it in the constructor lets the attribute be a
29+
# configurable one and allows a default value of False.
30+
#
31+
# Running following code will opt in to the the auto-closing
32+
# behavior for any data objects opened subsequently.
33+
#
34+
# >>> import irods.client_configuration as config
35+
# >>> irods.client_configuration.data_objects.auto_close = True
36+
37+
self.auto_close = False
38+
39+
# #############################################################################
40+
#
41+
# Instantiations of client-configuration categories:
42+
43+
# The usage "irods.client_configuration.data_objects" reflects the commonly used
44+
# manager name (session.data_objects) and is thus understood to influence the
45+
# behavior of data objects.
46+
#
47+
# By design, valid configurable targets (e.g. auto_close) are limited to the names
48+
# listed in the __slots__ member of the category class.
49+
50+
data_objects = DataObjects()

irods/manager/data_object_manager.py

Lines changed: 45 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from irods.collection import iRODSCollection
1313
from irods.data_object import (
1414
iRODSDataObject, iRODSDataObjectFileRaw, chunks, irods_dirname, irods_basename)
15+
import irods.client_configuration as client_config
1516
import irods.keywords as kw
1617
import irods.parallel as parallel
1718
from irods.parallel import deferred_call
@@ -20,6 +21,33 @@
2021
import json
2122
import logging
2223

24+
25+
26+
def call___del__if_exists(super_):
27+
"""
28+
Utility method to call __del__ if it exists anywhere in superclasses' MRO (method
29+
resolution order).
30+
"""
31+
next_finalizer_in_MRO = getattr(super_,'__del__',None)
32+
if next_finalizer_in_MRO:
33+
next_finalizer_in_MRO()
34+
35+
class ManagedBufferedRandom(io.BufferedRandom):
36+
37+
def __init__(self,*a,**kwd):
38+
# Help ensure proper teardown sequence by storing a reference to the session,
39+
# if provided via keyword '_session'.
40+
self._iRODS_session = kwd.pop('_session',None)
41+
super(ManagedBufferedRandom,self).__init__(*a,**kwd)
42+
import irods.session
43+
with irods.session._fds_lock:
44+
irods.session._fds[self] = None
45+
46+
def __del__(self):
47+
if not self.closed:
48+
self.close()
49+
call___del__if_exists(super(ManagedBufferedRandom,self))
50+
2351
MAXIMUM_SINGLE_THREADED_TRANSFER_SIZE = 32 * ( 1024 ** 2)
2452

2553
DEFAULT_NUMBER_OF_THREADS = 0 # Defaults for reasonable number of threads -- optimized to be
@@ -298,9 +326,14 @@ def open_with_FileRaw(self, *arg, **kw_options):
298326
kw.RESC_HIER_STR_KW
299327
))
300328

301-
302-
def open(self, path, mode, create = True, finalize_on_close = True, returned_values = None, allow_redirect = True, **options):
303-
329+
def open(self, path, mode,
330+
create = True, # (Dis-)allow object creation.
331+
finalize_on_close = True, # For PRC internal use.
332+
auto_close = client_config.getter('data_objects','auto_close'), # The default value will be a lambda returning the
333+
# global setting. Use True or False as an override.
334+
returned_values = None, # Used to update session reference, for forging more conns to same host, in irods.parallel.io_main
335+
allow_redirect = True, # This may be set to False to disallow the client redirect-to-resource.
336+
**options):
304337
_raw_fd_holder = options.get('_raw_fd_holder',[])
305338
# If no keywords are used that would influence the server as to the choice of a storage resource,
306339
# then use the default resource in the client configuration.
@@ -395,8 +428,16 @@ def make_FileOpenRequest(**extra_opts):
395428
raw.session = directed_sess
396429

397430
(_raw_fd_holder).append(raw)
398-
return io.BufferedRandom(raw)
399431

432+
if callable(auto_close):
433+
# Use case: auto_close has defaulted to the irods.configuration getter.
434+
# access entry in irods.configuration
435+
auto_close = auto_close()
436+
437+
if auto_close:
438+
return ManagedBufferedRandom(raw, _session = self.sess)
439+
440+
return io.BufferedRandom(raw)
400441

401442
def trim(self, path, **options):
402443

irods/session.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
from __future__ import absolute_import
2+
import ast
23
import atexit
34
import copy
4-
import os
5-
import ast
6-
import json
75
import errno
6+
import json
87
import logging
8+
import os
9+
import threading
10+
import weakref
911
from irods.query import Query
1012
from irods.pool import Pool
1113
from irods.account import iRODSAccount
@@ -19,25 +21,32 @@
1921
from irods.exception import NetworkException
2022
from irods.password_obfuscation import decode
2123
from irods import NATIVE_AUTH_SCHEME, PAM_AUTH_SCHEME
22-
import threading
23-
import weakref
2424
from . import DEFAULT_CONNECTION_TIMEOUT
2525

26+
_fds = None
27+
_fds_lock = threading.Lock()
2628
_sessions = None
2729
_sessions_lock = threading.Lock()
2830

31+
2932
def _cleanup_remaining_sessions():
33+
for fd in list(_fds.keys()):
34+
if not fd.closed:
35+
fd.close()
36+
# remove refs to session objects no longer needed
37+
fd._iRODS_session = None
3038
for ses in _sessions.copy():
3139
ses.cleanup() # internally modifies _sessions
3240

3341
def _weakly_reference(ses):
34-
global _sessions
42+
global _sessions, _fds
3543
try:
3644
if _sessions is None:
3745
with _sessions_lock:
3846
do_register = (_sessions is None)
3947
if do_register:
4048
_sessions = weakref.WeakKeyDictionary()
49+
_fds = weakref.WeakKeyDictionary()
4150
atexit.register(_cleanup_remaining_sessions)
4251
finally:
4352
_sessions[ses] = None

irods/test/data_obj_test.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import stat
1616
import string
1717
import sys
18+
import subprocess
1819
import time
1920
import unittest
2021
import xml.etree.ElementTree
@@ -38,6 +39,7 @@ def is_localhost_synonym(name):
3839
from irods.column import Criterion
3940
from irods.data_object import chunks, irods_dirname
4041
import irods.test.helpers as helpers
42+
import irods.test.modules as test_modules
4143
import irods.keywords as kw
4244
from irods.manager import data_object_manager
4345
from irods.message import RErrorStack
@@ -1888,6 +1890,28 @@ def test_set_and_access_data_comments__issue_450(self):
18881890
finally:
18891891
d.unlink(force = True)
18901892

1893+
def _auto_close_test(self, data_object_path, content):
1894+
d = None
1895+
try:
1896+
d = self.sess.data_objects.get(data_object_path)
1897+
self.assertEqual(int(d.replicas[0].status), 1)
1898+
self.assertEqual(d.open('r').read().decode(), content)
1899+
finally:
1900+
if d: d.unlink(force = True)
1901+
1902+
def test_data_objects_auto_close_on_process_exit__issue_456(self):
1903+
program = os.path.join(test_modules.__path__[0], 'test_auto_close_of_data_objects__issue_456.py')
1904+
# Use the currently running Python interpreter binary to run the script in the child process.
1905+
p = subprocess.Popen([sys.executable,program], stdout=subprocess.PIPE)
1906+
data_object_path, expected_content = p.communicate()[0].decode().split()
1907+
self._auto_close_test(data_object_path, expected_content)
1908+
1909+
def test_data_objects_auto_close_on_function_exit__issue_456(self):
1910+
import irods.test.modules.test_auto_close_of_data_objects__issue_456 as test_module
1911+
data_object_path, expected_content = test_module.test(return_locals = ('name','expected_content'))
1912+
self._auto_close_test(data_object_path, expected_content)
1913+
1914+
18911915
if __name__ == '__main__':
18921916
# let the tests find the parent irods lib
18931917
sys.path.insert(0, os.path.abspath('../..'))

irods/test/modules/__init__.py

Whitespace-only changes.
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# This helper module can double as a Python script, allowing us to run the below
2+
# test() method either within the current process or in a child process. The
3+
# method in question can thus be called by the following unit tests so that we may assert
4+
# proper data object auto-closing functionality under these respective scenarios:
5+
#
6+
# irods.test.data_obj_test.TestDataObjOps.test_data_objects_auto_close_on_function_exit__issue_456
7+
# irods.test.data_obj_test.TestDataObjOps.test_data_objects_auto_close_on_process_exit__issue_456
8+
9+
from __future__ import print_function
10+
import contextlib
11+
try:
12+
import irods.client_configuration as config
13+
except ImportError:
14+
pass
15+
from datetime import datetime
16+
import os
17+
from irods.test import helpers
18+
19+
@contextlib.contextmanager
20+
def auto_close_data_objects(value):
21+
if 'config' not in globals():
22+
yield
23+
return
24+
ORIGINAL_VALUE = config.data_objects.auto_close
25+
try:
26+
config.data_objects.auto_close = value
27+
yield
28+
finally:
29+
config.data_objects.auto_close = ORIGINAL_VALUE
30+
31+
def test(return_locals = True):
32+
with auto_close_data_objects(True):
33+
expected_content = 'content'
34+
ses = helpers.make_session()
35+
name = '/{0.zone}/home/{0.username}/{1}-object.dat'.format(ses, helpers.unique_name(os.getpid(), datetime.now()))
36+
f = ses.data_objects.open(name,'w')
37+
f.write(expected_content.encode('utf8'))
38+
L=locals()
39+
# By default, ses and f will be automatically exported to calling frame (with L being returned),
40+
# but by specifying a list/tuple of keys we can export only those specific locals by name.
41+
return L if not isinstance(return_locals,(tuple,list)) \
42+
else [ L[k] for k in return_locals ]
43+
44+
if __name__ == '__main__':
45+
test_output = test()
46+
print("{name} {expected_content}".format(**test_output))

0 commit comments

Comments
 (0)