Source code for sacc.data_types

import warnings

from astropy.table import Table

from .utils import Namespace, hide_null_values, null_values


required_tags = {
    "galaxy_shear_xi_plus": ["theta"],
    "galaxy_shear_xi_minus": ["theta"],
    "galaxy_shear_xi_plus_imaginary": ["theta"],
    "galaxy_shear_xi_minus_imaginary": ["theta"],
    "galaxy_density_w":["theta"] ,
    "ggl_gamma_t":["theta"] ,
    "ggl_gamma_x":["theta"] ,
    "galaxy_shear_ee": ["ell"],
    "galaxy_shear_bb": ["ell"],
    "galaxy_shear_eb": ["ell"],
    "galaxy_density_cl": ["ell"],
    "ggl_E": ["ell"],
    "ggl_B": ["ell"],
    

}

# This makes a namespace object, so you can do:
# known_types.ggl_E == "ggl_E"
# also, for convenience, you can do known_types.index('ggl_E') 
# and 'ggl_E' in known_types

known_types = Namespace(*required_tags.keys())


[docs]class DataPoint:
    """A class for a single data point (one scalar value).

    Data points have a type, zero or more tracers, a value,
    and any arbitrary tags that are stored in a dictionary,
    and can be used to describe angular scales, window functions,
    or any arbitrary information to be attached to the data.

    Data points can be automatically created and added to a
    Sacc object, so you don't normally nee to manually create them.

    Attributes
    -----------
    data_type: str
        A string, indicating the type of data point

    tracers: tuple
        Tuple of strings with the names of tracers to use

    value: float
        Mean value of this statistics

    tags: dict
        Dictionary of further data point metadata, such as binning info, angles, etc.

    """
    def __init__(self, data_type, tracers, value, ignore_missing_tags=False, **tags):
        """Create a new data point.

        Data points can be automatically created and added to a
        Sacc object, so you don't normally nee to manually create them.
        
        Parameters
        ----------
        data_type: str
            A string, indicating the type of data point

        tracers: tuple
            Tuple of strings with the names of tracers to use

        value: float
            Mean value of this statistics

        ignore_missing_tags: bool
            Optional, default=False.  If True, do not complain if a tracer usually
            needed for this data type is not present.

        **tags: dict[str:any]
            Dictionary of further data point metadata, such as binning info, angles, etc.
        """
        self.data_type = data_type
        self.tracers = tracers
        self.value = value
        self.tags = tags
        # Data types can have required tags which must be present.
        # Check for those here
        if (data_type in required_tags) and (not ignore_missing_tags):
            for tag in required_tags[data_type]:
                if tag not in tags:
                    raise ValueError(f"Tag {tag} required for data type {data_type} (ignore_missing_tags=False)")


        # We encourage people to use existing type names, and issue a warning if they do
        # not to prod them in the right direction.
        # We are removing this warning until we converge on what the data types should be
        # if data_type not in known_types:
        #     warnings.warn(f"Unknown data_type value {data_type}. If possible use a pre-defined type, or add to the list.")

    def __repr__(self):
        t = ", ".join(f'{k}={v}' for (k,v) in self.tags.items())
        return f"DataPoint(data_type='{self.data_type}', tracers={self.tracers}, value={self.value}, {t})"

[docs]    def get_tag(self, tag, default=None):
        """
        Get the value of the the named tag, or None if not found.

        Parameters
        ----------
        tag: str
            Tag to find on the data point

        default: any
            Value to return if the tag is not found

        Returns
        -------
        value: any
            Value of the tag in this data point
        """
        return self.tags.get(tag, default)

    def __getitem__(self, tag):
        """
        Get the value of the the named tag, raising an
        error if it is not found

        Parameters
        ----------
        tag: str
            Tag to find on the data point

        Returns
        -------
        value: any
            Value of the tag in this data point
        """
        return self.tags[tag]

    @staticmethod
    def _choose_fields(data):
        """
        Internal static method to generate a list of colum names from a list
        of data points.  Since the data points can be heterogenous then this
        is not quite trivial - we use the union of the tag names and tracer_0,
        tracer_1, etc. up to the max number of tracers.
        """
        tags = set()
        ntracer = 0
        for d in data:
            ntracer = max(ntracer, len(d.tracers))
            tags.update(d.tags.keys())
        tags = list(tags)
        tracers = [f'tracer_{i}' for i in range(ntracer)]
        return tracers, tags

[docs]    @classmethod
    def to_table(cls, data, lookups={}):
        """
        Convert a list of data points to a single homogenous table

        Since data points can have varying tags, this method uses
        null values to represent non-present tags.

        Parameters
        ----------
        data: list
            A list of DataPoint objects

        lookups: dict
            A dictionary of tags->dict showing replacements to make
            in the tags. Default is empty.

        Returns
        -------
        table: astropy.table.Table
            table object containing data points
        """
        # Get the names of the columns to generate
        tracers, tags = cls._choose_fields(data)
        names = tracers + ['value'] + tags
        ntracer = len(tracers)
        # Convert each data point to a row
        rows = [d._make_row(tracers, tags, lookups) for d in data]

        # Convert to a table and fiddle slightly.
        table = Table(rows=rows, names=names)
        table.meta['NTRACER'] = ntracer
        hide_null_values(table)
        return table

[docs]    @classmethod
    def from_table(cls, table, lookups={}):
        """Convert a table back into a list of data points.

        This method removes null values from the tags.

        Parameters
        ----------
        table: astropy.table.Table
            A table of data containing the tracers, values, and tags

        lookups: dict
            A dictionary of tags->dict showing replacements to make
            in the tags. Default is empty.

        Returns
        -------
        data: list
            list of DataPoint objects
        """
        # Get out required table metadata
        nt = table.meta['NTRACER']
        data_type = table.meta['SACCNAME']

        # Tag names - we will remove missing tags below
        tag_names = table.colnames[nt + 1:]
        data = []
        for row in table:
            # Get basic data elements
            tracers = tuple([row[f'tracer_{i}'] for i in range(nt)])
            value = row['value']

            # Deal with tags.  First just pull out all remaining columns
            tags = {name: row[name] for name in tag_names}
            for k, v in list(tags.items()):
                # Deal with any tags that we should replace.
                # This is mainly used for Window instances.
                if k in lookups:
                    tags[k] = lookups[k].get(v, v)
                # Now delete and null values, as indicated by the sentinel above.
                if hasattr(tags[k], 'dtype') and v == null_values[tags[k].dtype.kind]:
                    del tags[k]
            # Finally convert back to a data point and record
            data_point = cls(data_type, tracers, value, **tags)
            data.append(data_point)
        return data

    def _make_row(self, tracers, tags, lookups):
        """
        Turn this data point into a list with specified tracers and tags.
        If some tracers or tags are missing (homogenous data set) then
        use blank values or Nones for them.
        """
        nt = len(tracers)
        missing = nt - len(self.tracers)
        row = list(self.tracers) + ["" for i in range(missing)]
        row.append(self.value)
        for t in tags:
            v = self.tags.get(t)
            lookup = lookups.get(t)
            if lookup is not None:
                v = lookup.get(v, v)
            row.append(v)
        return row