Programmatic Access#

Important

Before using any programmatic access to the data, you first need to set up your CAVEclient token.

CAVEclient#

Most programmatic access to the CAVE services occurs through CAVEclient, a Python client to access various types of data from the online services.

Full documentation for CAVEclient is available here.

To initialize a caveclient, we give it a datastack, which is a name that defines a particular combination of imagery, segmentation, and annotation database. For the MICrONs public data, we use the datastack name minnie65_public.

import os
from caveclient import CAVEclient
datastack_name = 'minnie65_public'
client = CAVEclient(datastack_name)

# set version, for consistency across time
client.materialize.version = 1078 # Current as of Summer 2024

# Show the description of the datastack
client.info.get_datastack_info()['description']
---------------------------------------------------------------------------
RecursionError                            Traceback (most recent call last)
Cell In[2], line 7
      4 client = CAVEclient(datastack_name)
      6 # set version, for consistency across time
----> 7 client.materialize.version = 1078 # Current as of Summer 2024
      9 # Show the description of the datastack
     10 client.info.get_datastack_info()['description']

File /opt/envs/allensdk/lib/python3.10/site-packages/caveclient/frameworkclient.py:449, in CAVEclientFull.materialize(self)
    444 """
    445 A client for the materialization service. See [client.materialize](../client_api/materialize.md)
    446 for more information.
    447 """
    448 if self._materialize is None:
--> 449     self._materialize = MaterializationClient(
    450         server_address=self.local_server,
    451         auth_client=self.auth,
    452         datastack_name=self._datastack_name,
    453         synapse_table=self.info.get_datastack_info().get("synapse_table", None),
    454         max_retries=self._max_retries,
    455         pool_maxsize=self._pool_maxsize,
    456         pool_block=self._pool_block,
    457         over_client=self,
    458         desired_resolution=self.desired_resolution,
    459     )
    460 return self._materialize

File /opt/envs/allensdk/lib/python3.10/site-packages/caveclient/materializationengine.py:201, in MaterializationClient(server_address, datastack_name, auth_client, cg_client, synapse_table, api_version, version, verify, max_retries, pool_maxsize, pool_block, desired_resolution, over_client)
    189 endpoints, api_version = _api_endpoints(
    190     api_version,
    191     SERVER_KEY,
   (...)
    197     verify=verify,
    198 )
    200 MatClient = client_mapping[api_version]
--> 201 return MatClient(
    202     server_address,
    203     auth_header,
    204     api_version,
    205     endpoints,
    206     SERVER_KEY,
    207     datastack_name,
    208     cg_client=cg_client,
    209     synapse_table=synapse_table,
    210     version=version,
    211     verify=verify,
    212     max_retries=max_retries,
    213     pool_maxsize=pool_maxsize,
    214     pool_block=pool_block,
    215     over_client=over_client,
    216     desired_resolution=desired_resolution,
    217 )

File /opt/envs/allensdk/lib/python3.10/site-packages/caveclient/materializationengine.py:1924, in MaterializationClientV3.__init__(self, *args, **kwargs)
   1922 if self.fc is not None:
   1923     if metadata[0].result() is not None and metadata[1].result() is not None:
-> 1924         tables = TableManager(
   1925             self.fc, metadata[0].result(), metadata[1].result()
   1926         )
   1927 self._tables = tables
   1928 if self.fc is not None:

File /opt/envs/allensdk/lib/python3.10/site-packages/caveclient/tools/table_manager.py:669, in TableManager.__init__(self, client, metadata, schema)
    667 populate_table_cache(client, metadata=self._table_metadata)
    668 for tn in self._tables:
--> 669     setattr(self, tn, make_query_filter(tn, self._table_metadata[tn], client))

File /opt/envs/allensdk/lib/python3.10/site-packages/caveclient/tools/table_manager.py:623, in make_query_filter(table_name, meta, client)
    614 def make_query_filter(table_name, meta, client):
    615     (
    616         pts,
    617         val_cols,
    618         all_unbd_pts,
    619         table_map,
    620         rename_map,
    621         table_list,
    622         desc,
--> 623     ) = get_table_info(table_name, meta, client)
    624     class_vals = make_class_vals(
    625         pts, val_cols, all_unbd_pts, table_map, rename_map, table_list
    626     )
    627     QueryFilter = attrs.make_class(
    628         table_name, class_vals, bases=(make_kwargs_mixin(client),)
    629     )

File /opt/envs/allensdk/lib/python3.10/site-packages/cachetools/__init__.py:741, in cached.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    739 except KeyError:
    740     pass  # key not found
--> 741 v = func(*args, **kwargs)
    742 try:
    743     cache[k] = v

File /opt/envs/allensdk/lib/python3.10/site-packages/caveclient/tools/table_manager.py:267, in get_table_info(tn, meta, client, allow_types, merge_schema, suffixes)
    265     name_ref = None
    266 else:
--> 267     schema = table_metadata(ref_table, client).get("schema")
    268     ref_pts, ref_cols, ref_unbd_pts = get_col_info(
    269         meta["schema"], client, allow_types=allow_types, omit_fields=["target_id"]
    270     )
    271     name_base = ref_table

File /opt/envs/allensdk/lib/python3.10/site-packages/cachetools/__init__.py:741, in cached.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    739 except KeyError:
    740     pass  # key not found
--> 741 v = func(*args, **kwargs)
    742 try:
    743     cache[k] = v

File /opt/envs/allensdk/lib/python3.10/site-packages/caveclient/tools/table_manager.py:314, in table_metadata(table_name, client, meta)
    312     warnings.simplefilter(action="ignore")
    313     if meta is None:
--> 314         meta = client.materialize.get_table_metadata(table_name)
    315 if "schema" not in meta:
    316     meta["schema"] = meta.get("schema_type")

File /opt/envs/allensdk/lib/python3.10/site-packages/caveclient/frameworkclient.py:449, in CAVEclientFull.materialize(self)
    444 """
    445 A client for the materialization service. See [client.materialize](../client_api/materialize.md)
    446 for more information.
    447 """
    448 if self._materialize is None:
--> 449     self._materialize = MaterializationClient(
    450         server_address=self.local_server,
    451         auth_client=self.auth,
    452         datastack_name=self._datastack_name,
    453         synapse_table=self.info.get_datastack_info().get("synapse_table", None),
    454         max_retries=self._max_retries,
    455         pool_maxsize=self._pool_maxsize,
    456         pool_block=self._pool_block,
    457         over_client=self,
    458         desired_resolution=self.desired_resolution,
    459     )
    460 return self._materialize

File /opt/envs/allensdk/lib/python3.10/site-packages/caveclient/materializationengine.py:201, in MaterializationClient(server_address, datastack_name, auth_client, cg_client, synapse_table, api_version, version, verify, max_retries, pool_maxsize, pool_block, desired_resolution, over_client)
    189 endpoints, api_version = _api_endpoints(
    190     api_version,
    191     SERVER_KEY,
   (...)
    197     verify=verify,
    198 )
    200 MatClient = client_mapping[api_version]
--> 201 return MatClient(
    202     server_address,
    203     auth_header,
    204     api_version,
    205     endpoints,
    206     SERVER_KEY,
    207     datastack_name,
    208     cg_client=cg_client,
    209     synapse_table=synapse_table,
    210     version=version,
    211     verify=verify,
    212     max_retries=max_retries,
    213     pool_maxsize=pool_maxsize,
    214     pool_block=pool_block,
    215     over_client=over_client,
    216     desired_resolution=desired_resolution,
    217 )

File /opt/envs/allensdk/lib/python3.10/site-packages/caveclient/materializationengine.py:1924, in MaterializationClientV3.__init__(self, *args, **kwargs)
   1922 if self.fc is not None:
   1923     if metadata[0].result() is not None and metadata[1].result() is not None:
-> 1924         tables = TableManager(
   1925             self.fc, metadata[0].result(), metadata[1].result()
   1926         )
   1927 self._tables = tables
   1928 if self.fc is not None:

File /opt/envs/allensdk/lib/python3.10/site-packages/caveclient/tools/table_manager.py:669, in TableManager.__init__(self, client, metadata, schema)
    667 populate_table_cache(client, metadata=self._table_metadata)
    668 for tn in self._tables:
--> 669     setattr(self, tn, make_query_filter(tn, self._table_metadata[tn], client))

File /opt/envs/allensdk/lib/python3.10/site-packages/caveclient/tools/table_manager.py:623, in make_query_filter(table_name, meta, client)
    614 def make_query_filter(table_name, meta, client):
    615     (
    616         pts,
    617         val_cols,
    618         all_unbd_pts,
    619         table_map,
    620         rename_map,
    621         table_list,
    622         desc,
--> 623     ) = get_table_info(table_name, meta, client)
    624     class_vals = make_class_vals(
    625         pts, val_cols, all_unbd_pts, table_map, rename_map, table_list
    626     )
    627     QueryFilter = attrs.make_class(
    628         table_name, class_vals, bases=(make_kwargs_mixin(client),)
    629     )

    [... skipping similar frames: cached.<locals>.decorator.<locals>.wrapper at line 741 (1 times)]

File /opt/envs/allensdk/lib/python3.10/site-packages/caveclient/tools/table_manager.py:267, in get_table_info(tn, meta, client, allow_types, merge_schema, suffixes)
    265     name_ref = None
    266 else:
--> 267     schema = table_metadata(ref_table, client).get("schema")
    268     ref_pts, ref_cols, ref_unbd_pts = get_col_info(
    269         meta["schema"], client, allow_types=allow_types, omit_fields=["target_id"]
    270     )
    271     name_base = ref_table

    [... skipping similar frames: cached.<locals>.decorator.<locals>.wrapper at line 741 (1 times)]

File /opt/envs/allensdk/lib/python3.10/site-packages/caveclient/tools/table_manager.py:314, in table_metadata(table_name, client, meta)
    312     warnings.simplefilter(action="ignore")
    313     if meta is None:
--> 314         meta = client.materialize.get_table_metadata(table_name)
    315 if "schema" not in meta:
    316     meta["schema"] = meta.get("schema_type")

    [... skipping similar frames: cached.<locals>.decorator.<locals>.wrapper at line 741 (530 times), MaterializationClient at line 201 (265 times), MaterializationClientV3.__init__ at line 1924 (265 times), TableManager.__init__ at line 669 (265 times), get_table_info at line 267 (265 times), make_query_filter at line 623 (265 times), CAVEclientFull.materialize at line 449 (265 times), table_metadata at line 314 (265 times)]

File /opt/envs/allensdk/lib/python3.10/site-packages/caveclient/frameworkclient.py:449, in CAVEclientFull.materialize(self)
    444 """
    445 A client for the materialization service. See [client.materialize](../client_api/materialize.md)
    446 for more information.
    447 """
    448 if self._materialize is None:
--> 449     self._materialize = MaterializationClient(
    450         server_address=self.local_server,
    451         auth_client=self.auth,
    452         datastack_name=self._datastack_name,
    453         synapse_table=self.info.get_datastack_info().get("synapse_table", None),
    454         max_retries=self._max_retries,
    455         pool_maxsize=self._pool_maxsize,
    456         pool_block=self._pool_block,
    457         over_client=self,
    458         desired_resolution=self.desired_resolution,
    459     )
    460 return self._materialize

File /opt/envs/allensdk/lib/python3.10/site-packages/caveclient/materializationengine.py:201, in MaterializationClient(server_address, datastack_name, auth_client, cg_client, synapse_table, api_version, version, verify, max_retries, pool_maxsize, pool_block, desired_resolution, over_client)
    189 endpoints, api_version = _api_endpoints(
    190     api_version,
    191     SERVER_KEY,
   (...)
    197     verify=verify,
    198 )
    200 MatClient = client_mapping[api_version]
--> 201 return MatClient(
    202     server_address,
    203     auth_header,
    204     api_version,
    205     endpoints,
    206     SERVER_KEY,
    207     datastack_name,
    208     cg_client=cg_client,
    209     synapse_table=synapse_table,
    210     version=version,
    211     verify=verify,
    212     max_retries=max_retries,
    213     pool_maxsize=pool_maxsize,
    214     pool_block=pool_block,
    215     over_client=over_client,
    216     desired_resolution=desired_resolution,
    217 )

File /opt/envs/allensdk/lib/python3.10/site-packages/caveclient/materializationengine.py:1924, in MaterializationClientV3.__init__(self, *args, **kwargs)
   1922 if self.fc is not None:
   1923     if metadata[0].result() is not None and metadata[1].result() is not None:
-> 1924         tables = TableManager(
   1925             self.fc, metadata[0].result(), metadata[1].result()
   1926         )
   1927 self._tables = tables
   1928 if self.fc is not None:

File /opt/envs/allensdk/lib/python3.10/site-packages/caveclient/tools/table_manager.py:669, in TableManager.__init__(self, client, metadata, schema)
    667 populate_table_cache(client, metadata=self._table_metadata)
    668 for tn in self._tables:
--> 669     setattr(self, tn, make_query_filter(tn, self._table_metadata[tn], client))

File /opt/envs/allensdk/lib/python3.10/site-packages/caveclient/tools/table_manager.py:623, in make_query_filter(table_name, meta, client)
    614 def make_query_filter(table_name, meta, client):
    615     (
    616         pts,
    617         val_cols,
    618         all_unbd_pts,
    619         table_map,
    620         rename_map,
    621         table_list,
    622         desc,
--> 623     ) = get_table_info(table_name, meta, client)
    624     class_vals = make_class_vals(
    625         pts, val_cols, all_unbd_pts, table_map, rename_map, table_list
    626     )
    627     QueryFilter = attrs.make_class(
    628         table_name, class_vals, bases=(make_kwargs_mixin(client),)
    629     )

    [... skipping similar frames: cached.<locals>.decorator.<locals>.wrapper at line 741 (1 times)]

File /opt/envs/allensdk/lib/python3.10/site-packages/caveclient/tools/table_manager.py:267, in get_table_info(tn, meta, client, allow_types, merge_schema, suffixes)
    265     name_ref = None
    266 else:
--> 267     schema = table_metadata(ref_table, client).get("schema")
    268     ref_pts, ref_cols, ref_unbd_pts = get_col_info(
    269         meta["schema"], client, allow_types=allow_types, omit_fields=["target_id"]
    270     )
    271     name_base = ref_table

    [... skipping similar frames: cached.<locals>.decorator.<locals>.wrapper at line 741 (1 times)]

File /opt/envs/allensdk/lib/python3.10/site-packages/caveclient/tools/table_manager.py:314, in table_metadata(table_name, client, meta)
    312     warnings.simplefilter(action="ignore")
    313     if meta is None:
--> 314         meta = client.materialize.get_table_metadata(table_name)
    315 if "schema" not in meta:
    316     meta["schema"] = meta.get("schema_type")

File /opt/envs/allensdk/lib/python3.10/site-packages/cachetools/__init__.py:741, in cached.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    739 except KeyError:
    740     pass  # key not found
--> 741 v = func(*args, **kwargs)
    742 try:
    743     cache[k] = v

File /opt/envs/allensdk/lib/python3.10/site-packages/caveclient/materializationengine.py:519, in MaterializationClientV2.get_table_metadata(self, table_name, datastack_name, version, log_warning)
    517     datastack_name = self.datastack_name
    518 if version is None:
--> 519     version = self.version
    520 endpoint_mapping = self.default_url_mapping
    521 endpoint_mapping["datastack_name"] = datastack_name

File /opt/envs/allensdk/lib/python3.10/site-packages/caveclient/materializationengine.py:280, in MaterializationClientV2.version(self)
    276 """The version of the materialization. Can be used to set up the
    277 client to default to a specific version when timestamps or versions are not
    278 specified in queries. If not set, defaults to the most recent version."""
    279 if self._version is None:
--> 280     self._version = self.most_recent_version()
    281 return self._version

File /opt/envs/allensdk/lib/python3.10/site-packages/caveclient/materializationengine.py:335, in MaterializationClientV2.most_recent_version(self, datastack_name)
    318 def most_recent_version(self, datastack_name=None) -> int:
    319     """
    320     Get the most recent version of materialization for this datastack name
    321 
   (...)
    332         Most recent version of materialization for this datastack name
    333     """
--> 335     versions = self.get_versions(datastack_name=datastack_name)
    336     return np.max(np.array(versions))

File /opt/envs/allensdk/lib/python3.10/site-packages/caveclient/materializationengine.py:360, in MaterializationClientV2.get_versions(self, datastack_name, expired)
    358 url = self._endpoints["versions"].format_map(endpoint_mapping)
    359 query_args = {"expired": expired}
--> 360 response = self.session.get(url, params=query_args)
    361 self.raise_for_status(response)
    362 return response.json()

File /opt/envs/allensdk/lib/python3.10/site-packages/requests/sessions.py:602, in Session.get(self, url, **kwargs)
    594 r"""Sends a GET request. Returns :class:`Response` object.
    595 
    596 :param url: URL for the new :class:`Request` object.
    597 :param \*\*kwargs: Optional arguments that ``request`` takes.
    598 :rtype: requests.Response
    599 """
    601 kwargs.setdefault("allow_redirects", True)
--> 602 return self.request("GET", url, **kwargs)

File /opt/envs/allensdk/lib/python3.10/site-packages/requests/sessions.py:589, in Session.request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
    584 send_kwargs = {
    585     "timeout": timeout,
    586     "allow_redirects": allow_redirects,
    587 }
    588 send_kwargs.update(settings)
--> 589 resp = self.send(prep, **send_kwargs)
    591 return resp

File /opt/envs/allensdk/lib/python3.10/site-packages/requests/sessions.py:703, in Session.send(self, request, **kwargs)
    700 start = preferred_clock()
    702 # Send the request
--> 703 r = adapter.send(request, **kwargs)
    705 # Total elapsed time of the request (approximately)
    706 elapsed = preferred_clock() - start

File /opt/envs/allensdk/lib/python3.10/site-packages/requests/adapters.py:667, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies)
    664     timeout = TimeoutSauce(connect=timeout, read=timeout)
    666 try:
--> 667     resp = conn.urlopen(
    668         method=request.method,
    669         url=url,
    670         body=request.body,
    671         headers=request.headers,
    672         redirect=False,
    673         assert_same_host=False,
    674         preload_content=False,
    675         decode_content=False,
    676         retries=self.max_retries,
    677         timeout=timeout,
    678         chunked=chunked,
    679     )
    681 except (ProtocolError, OSError) as err:
    682     raise ConnectionError(err, request=request)

File /opt/envs/allensdk/lib/python3.10/site-packages/urllib3/connectionpool.py:789, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)
    786 response_conn = conn if not release_conn else None
    788 # Make the request on the HTTPConnection object
--> 789 response = self._make_request(
    790     conn,
    791     method,
    792     url,
    793     timeout=timeout_obj,
    794     body=body,
    795     headers=headers,
    796     chunked=chunked,
    797     retries=retries,
    798     response_conn=response_conn,
    799     preload_content=preload_content,
    800     decode_content=decode_content,
    801     **response_kw,
    802 )
    804 # Everything went great!
    805 clean_exit = True

File /opt/envs/allensdk/lib/python3.10/site-packages/urllib3/connectionpool.py:536, in HTTPConnectionPool._make_request(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)
    534 # Receive the response from the server
    535 try:
--> 536     response = conn.getresponse()
    537 except (BaseSSLError, OSError) as e:
    538     self._raise_timeout(err=e, url=url, timeout_value=read_timeout)

File /opt/envs/allensdk/lib/python3.10/site-packages/urllib3/connection.py:507, in HTTPConnection.getresponse(self)
    504 from .response import HTTPResponse
    506 # Get the response from http.client.HTTPConnection
--> 507 httplib_response = super().getresponse()
    509 try:
    510     assert_header_parsing(httplib_response.msg)

File /opt/conda/lib/python3.10/http/client.py:1375, in HTTPConnection.getresponse(self)
   1373 try:
   1374     try:
-> 1375         response.begin()
   1376     except ConnectionError:
   1377         self.close()

File /opt/conda/lib/python3.10/http/client.py:337, in HTTPResponse.begin(self)
    334 else:
    335     raise UnknownProtocol(version)
--> 337 self.headers = self.msg = parse_headers(self.fp)
    339 if self.debuglevel > 0:
    340     for hdr, val in self.headers.items():

File /opt/conda/lib/python3.10/http/client.py:236, in parse_headers(fp, _class)
    234 headers = _read_headers(fp)
    235 hstring = b''.join(headers).decode('iso-8859-1')
--> 236 return email.parser.Parser(_class=_class).parsestr(hstring)

File /opt/conda/lib/python3.10/email/parser.py:67, in Parser.parsestr(self, text, headersonly)
     59 def parsestr(self, text, headersonly=False):
     60     """Create a message structure from a string.
     61 
     62     Returns the root of the message structure.  Optional headersonly is a
   (...)
     65     the file.
     66     """
---> 67     return self.parse(StringIO(text), headersonly=headersonly)

File /opt/conda/lib/python3.10/email/parser.py:56, in Parser.parse(self, fp, headersonly)
     54     if not data:
     55         break
---> 56     feedparser.feed(data)
     57 return feedparser.close()

File /opt/conda/lib/python3.10/email/feedparser.py:176, in FeedParser.feed(self, data)
    174 """Push more data into the parser."""
    175 self._input.push(data)
--> 176 self._call_parse()

File /opt/conda/lib/python3.10/email/feedparser.py:180, in FeedParser._call_parse(self)
    178 def _call_parse(self):
    179     try:
--> 180         self._parse()
    181     except StopIteration:
    182         pass

File /opt/conda/lib/python3.10/email/feedparser.py:256, in FeedParser._parsegen(self)
    254     self._cur.set_payload(EMPTYSTRING.join(lines))
    255     return
--> 256 if self._cur.get_content_type() == 'message/delivery-status':
    257     # message/delivery-status contains blocks of headers separated by
    258     # a blank line.  We'll represent each header block as a separate
    259     # nested message object, but the processing is a bit different
    260     # than standard message/* types because there is no body for the
    261     # nested messages.  A blank line separates the subparts.
    262     while True:
    263         self._input.push_eof_matcher(NLCRE.match)

File /opt/conda/lib/python3.10/email/message.py:578, in Message.get_content_type(self)
    565 """Return the message's content type.
    566 
    567 The returned string is coerced to lower case of the form
   (...)
    575 message/rfc822.
    576 """
    577 missing = object()
--> 578 value = self.get('content-type', missing)
    579 if value is missing:
    580     # This should have no parameters
    581     return self.get_default_type()

File /opt/conda/lib/python3.10/email/message.py:471, in Message.get(self, name, failobj)
    469 for k, v in self._headers:
    470     if k.lower() == name:
--> 471         return self.policy.header_fetch_parse(k, v)
    472 return failobj

File /opt/conda/lib/python3.10/email/_policybase.py:316, in Compat32.header_fetch_parse(self, name, value)
    311 def header_fetch_parse(self, name, value):
    312     """+
    313     If the value contains binary data, it is converted into a Header object
    314     using the unknown-8bit charset.  Otherwise it is returned unmodified.
    315     """
--> 316     return self._sanitize_header(name, value)

RecursionError: maximum recursion depth exceeded

CAVEclient Basics#

The most frequent use of the CAVEclient is to query the database for annotations like synapses. All database functions are under the client.materialize property. To see what tables are available, use the get_tables function:

client.materialize.get_tables()

For each table, you can see the metadata describing that table. For example, let’s look at the nucleus_detection_v0 table:

client.materialize.get_table_metadata('nucleus_detection_v0')

You get a dictionary of values. Two fields are particularly important: the description, which offers a text description of the contents of the table and voxel_resolution which defines how the coordinates in the table are defined, in nm/voxel.

Querying Tables#

To get the contents of a table, use the query_table function. This will return the whole contents of a table without any filtering, up to for a maximum limit of 200,000 rows. The table is returned as a Pandas DataFrame and you can immediately use standard Pandas function on it.

cell_type_df = client.materialize.query_table('nucleus_detection_v0')
cell_type_df.head()

Important

While most tables are small enough to be returned in full, the synapse table has hundreds of millions of rows and is too large to download this way

Tables have a collection of columns, some of which specify point in space (columns ending in _position), some a root id (ending in _root_id), and others that contain other information about the object at that point. Before describing some of the most important tables in the database, it’s useful to know about a few advanced options that apply when querying any table.

  • desired_resolution : This parameter allows you to convert the columns specifying spatial points to different resolutions. Many tables are stored at a resolution of 4x4x40 nm/voxel, for example, but you can convert to nanometers by setting desired_resolution=[1,1,1].

  • split_positions : This parameter allows you to split the columns specifying spatial points into separate columns for each dimension. The new column names will be the original column name with _x, _y, and _z appended.

  • select_columns : This parameter allows you to get only a subset of columns from the table. Once you know exactly what you want, this can save you some cleanup.

  • limit : This parameter allows you to limit the number of rows returned. If you are just testing out a query or trying to inspect the kind of data within a table, you can set this to a small number to make sure it works before downloading the whole table. Note that this will show a warning so that you don’t accidentally limit your query when you don’t mean to.

For example, using all of these together:

cell_type_df = client.materialize.query_table('nucleus_detection_v0', split_positions=True, desired_resolution=[1,1,1], select_columns=['pt_position', 'pt_root_id'], limit=10)
cell_type_df

Filtering Queries#

Filtering tables so that you only get data about certain rows back is a very common operation. While there are filtering options in the query_table function (see documentation for more details), a more unified filter interface is available through a “table manager” interface. Rather than passing a table name to the query_table function, client.materialize.tables has a subproperty for each table in the database that can be used to filter that table. The general pattern for usage is

client.materialize.tables.{table_name}({filter options}).query({format and timestamp options})

where {table_name} is the name of the table you want to filter, {filter options} is a collection of arguments for filtering the query, and {format and timestamp options} are those parameters controlling the format and timestamp of the query.

For example, let’s look at the table aibs_metamodel_celltypes_v661, which has cell type predictions across the dataset. We can get the whole table as a DataFrame:

cell_type_df = client.materialize.tables.aibs_metamodel_celltypes_v661().query()
cell_type_df.head()

and we can add similar formatting options as in the last section to the query function:

cell_type_df = client.materialize.tables.aibs_metamodel_celltypes_v661().query(split_positions=True, desired_resolution=[1,1,1], select_columns=['pt_position', 'pt_root_id', 'cell_type'], limit=10)
cell_type_df

However, now we can also filter the table to get only cells that are predicted to have cell type "BC" (for “basket cell”).

my_cell_type = "BC"
client.materialize.tables.aibs_metamodel_celltypes_v661(cell_type=my_cell_type).query()

or maybe we just want the cell types for a particular collection of root ids:

my_root_ids = [864691135771677771, 864691135560505569, 864691136723556861]
client.materialize.tables.aibs_metamodel_celltypes_v661(pt_root_id=my_root_ids).query()

You can get a list of all parameters than be used for querying with the standard IPython/Jupyter docstring functionality, e.g. client.materialize.tables.aibs_metamodel_celltypes_v661.

Note

Use of this functionality will show a brief warning that the interface is experimental. This is because the interface is still being developed and may change in the near future in response to user feedback.

Querying Synapses#

While synapses are stored as any other table in the database, in this case synapses_pni_2, this table is much larger than any other table at more than 337 million rows, and it works best when queried in a different way. The synapse_query function allows you to query the synapse table in a more convenient way than most other tables. In particular, the pre_ids and post_ids let you specify which root id (or collection of root ids) you want to query, with pre_ids indicating the collection of presynaptic neurons and post_ids the collection of postsynaptic neurons. Using both pre_ids and post_ids in one call is effectively a logical AND, returning only those synapses from neurons in the list of pre_ids that target neurons in the list of post_ids. Let’s look at one particular example.

my_root_id = 864691135808473885
syn_df = client.materialize.synapse_query(pre_ids=my_root_id)
print(f"Total number of output synapses for {my_root_id}: {len(syn_df)}")
syn_df.head()

Note that synapse queries always return the list of every synapse between the neurons in the query, even if there are multiple synapses between the same pair of neurons.

A common pattern to generate a list of connections between unique pairs of neurons is to group by the root ids of the presynaptic and postsynaptic neurons and then count the number of synapses between them. For example, to get the number of synapses from this neuron onto every other neuron, ordered

syn_df.groupby(
  ['pre_pt_root_id', 'post_pt_root_id']
).count()[['id']].rename(
  columns={'id': 'syn_count'}
).sort_values(
  by='syn_count',
  ascending=False,
)
# Note that the 'id' part here is just a way to quickly extract one column.
# This could be any of the remaining column names, but `id` is often convenient because it is common to all tables.