Merge pull request #67 from visdesignlab/minor-language-update

minor language changes for intersection information
visdesignlab · Sep 5, 2024 · 94f3f11 · 94f3f11
2 parents 6a71a5c + 3064d77
commit 94f3f11
Show file tree

Hide file tree

Showing 3 changed files with 55 additions and 30 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -24,7 +24,7 @@ no_implicit_reexport = true
 
 [project]
 name = "upset-alttxt"
-version = "0.3.2"
+version = "0.3.3"
 description = "Generates alt text for UpSet plots"
 readme = "README.md"
 requires-python = ">=3.8"
diff --git a/src/alttxt/phrases.py b/src/alttxt/phrases.py
@@ -107,7 +107,7 @@
         "short_description": "This is an UpSet plot which shows the intersections of {{visible_set_count}} sets."
         " [[learn_more]]."
         " {{highest_dominant_set}}."
-        " The largest intersection is {{max_intersection_name}}, with {{max_intersection_size}} {{set_description}}."
+        " The largest non-empty intersection is {{max_intersection_name}}, with {{max_intersection_size}} {{set_description}}."
         " Other large intersections also involve {{large_sets}}."
         " {{all_set_index}}",
 

diff --git a/src/alttxt/tokenmap.py b/src/alttxt/tokenmap.py
@@ -19,7 +19,7 @@ class TokenMap:
     it is not responsible for the actual substitution of tokens
     or the overall generation of the text description.
     """
-    TRUNCATION_LENGTH = 15  # Global constant for truncation length
+    TRUNCATION_LENGTH = 19  # Global constant for truncation length
 
     def __init__(
         self, data: DataModel, grammar: GrammarModel, title: Optional[str] = None
@@ -396,15 +396,19 @@ def max_n_intersections(self, n: int) -> str:
                 break
 
             formatted_names = self.truncate_separately(sort[i].name)
-            result += f"{formatted_names} ({sort[i].size}), "
-
-            if i == n - 2:
-                result += "and "
+            result += f"{formatted_names} ({sort[i].size}); "
+
+        result = result.rstrip("; ")
+        parts = result.split("; ")
+
+        if len(parts) > 2:
+            result = "; ".join(parts[:-1]) + "; and " + parts[-1]
+        elif len(parts) == 2:
+            result = " and ".join(parts)
 
-        # Trim the trailing ', '
         if len(sort) < n:
-            return f"The largest {len(sort)} intersections are {result[:-2]}"
-        return f"The largest {n} intersections are {result[:-2]}"
+            return f"The largest {len(sort)} intersections are {result}"
+        return f"The largest {n} intersections are {result}"
 
     def degree_count(self) -> str:
         """
@@ -610,7 +614,26 @@ def calculate_max_intersection(self) -> dict[str, int]:
 
         # 'largest_subset' now holds the subset with more than one set that has the largest size
         if largest_subset is not None:
-            return self.truncate_separately(largest_subset.name), largest_subset.size
+            truncated_names = []
+            sorted_subset = largest_subset.name
+
+            for name in sorted_subset.split(', '):
+                if name == "the empty intersection":
+                    truncated_names.append(name)
+                else:
+                    truncated_names.append(self.truncate_string(name))
+
+
+            if len(truncated_names) == 2:
+                formatted_names = "between " + " and ".join(truncated_names)
+            elif len(truncated_names) > 2:
+                formatted_names = "between " + ", ".join(truncated_names[:-1]) + ", and " + truncated_names[-1]
+            else:
+                formatted_names = "Just " + truncated_names[0]
+                formatted_names = truncated_names[0] if truncated_names[0] == "the empty intersection" else "Just " + truncated_names[0]
+
+            return formatted_names, largest_subset.size
+
         else:
             return None, 0
 
@@ -636,12 +659,9 @@ def calculate_min_intersection(self) -> dict[str, int]:
 
 
     def calculate_set_divergence(self):
-        # Assuming self.data.subsets is a list of Subset objects with a 'size' attribute
-        # First, find the max and min set sizes
         max_set_size = self.sort_visible_sets()[0][1]
         min_set_size = self.sort_visible_sets()[-1][1]
 
-        # Calculate the divergence percentage
         divergence_percentage = (min_set_size / max_set_size) * 100
 
         # Determine the divergence category
@@ -651,7 +671,7 @@ def calculate_set_divergence(self):
             return IndividualSetSize.DIVERGING.value
         elif 53.35 <= divergence_percentage <= 79.99:
             return IndividualSetSize.DIVERGINGABIT.value
-        else: # divergence_percentage >= 80
+        else:
             return IndividualSetSize.IDENTICAL.value
 
 
@@ -664,8 +684,7 @@ def calculate_change_trend(self):
         mean_size = statistics.mean(intersection_sizes)
 
         # Determine the trend based on the standard deviation
-        # Adjust the threshold as necessary for your specific data and requirements
-        threshold = 0.1  # Example threshold for deciding between gradual and drastic
+        threshold = 0.1  
         relative_std_dev = std_dev / mean_size
 
         if std_dev == 0:
@@ -676,14 +695,11 @@ def calculate_change_trend(self):
             return IntersectionTrend.DRASTIC.value
 
     def calculate_largest_factor(self):
-        # Ensure the list is sorted in descending order
         sorted_sizes = self.sort_subsets_by_key(SubsetField.SIZE, True)
-        # Calculate the factor
         if len(sorted_sizes) >= 2:
             largest_size = sorted_sizes[0].size
             second_largest_size = sorted_sizes[1].size
 
-            # Avoid division by zero
             if second_largest_size > 0:
                 factor = largest_size / second_largest_size
                 decimal_part = factor - math.floor(factor)
@@ -941,14 +957,13 @@ def find_dominant_sets(self, visible_sets):
 
     def find_sets_in_large_subsets(self):
 
-        # Sort subsets by size in descending order
         sorted_subsets = sorted(self.data.subsets, key=lambda subset: subset.size, reverse=True)
 
-        # Check the top two largest subsets and remove them if they have empty setMembership
+        # Check the top two largest subsets and remove them if they have empty setMembership. Remove the second and third largest, if empty
         if len(sorted_subsets) > 1 and len(sorted_subsets[1].setMembership) == 0:
-            sorted_subsets.pop(1)  # Remove the second largest if empty
+            sorted_subsets.pop(1) 
         elif len(sorted_subsets) > 2 and len(sorted_subsets[2].setMembership) == 0:
-            sorted_subsets.pop(2)  # Remove the third largest if empty
+            sorted_subsets.pop(2)  
 
         # Extract set names from the 2nd largest subset
         second_largest_sets = sorted_subsets[1].setMembership
@@ -967,7 +982,6 @@ def find_sets_in_large_subsets(self):
             for sm in second_largest_sets:
                 sets.append(sm)
 
-         # Formatting the return value based on the size of the sets list
         if len(sets) == 2:
             return f"{self.truncate_string(sets[0])} and {self.truncate_string(sets[1])}"
         elif len(sets) > 2:
@@ -977,7 +991,6 @@ def find_sets_in_large_subsets(self):
 
 
     def get_all_set_position(self):
-    # Sort subsets by size in descending order
         sorted_subsets = sorted(self.data.subsets, key=lambda subset: subset.size, reverse=True)
 
         # Find the "all set" intersection if it exists
@@ -988,8 +1001,8 @@ def get_all_set_position(self):
                 all_set_index = index
                 break
 
+        # Determine the position of the "all set" intersection
         if all_set_index is not None:
-            # Determine the position of the "all set" intersection
             total_subsets = len(sorted_subsets)
             if all_set_index == 0:
                 return f"The intersection of all sets is the largest with {all_set_size} elements."
@@ -1010,7 +1023,6 @@ def get_all_set_position(self):
 
 
     def truncate_string(self, original_string):
-            # Ensure the length is not greater than the string's length
         if original_string.lower().startswith('just '):
             original_string = original_string[5:]
         if original_string.lower().startswith('and '):
@@ -1038,8 +1050,21 @@ def truncate_separately(self, sorted_subset):
         Returns:
         str: A string of formatted and truncated set names.
         """
-        truncated_names = [self.truncate_string(name) for name in sorted_subset.split(', ')]
-        formatted_names = ", ".join(truncated_names[:-1]) + ", and " + truncated_names[-1] if len(truncated_names) > 1 else "Just " + truncated_names[0]
+        truncated_names = []
+
+        for name in sorted_subset.split(', '):
+            if name == "the empty intersection":
+                truncated_names.append(name)
+            else:
+                truncated_names.append(self.truncate_string(name))
+
+
+        if len(truncated_names) == 2:
+            formatted_names = " and ".join(truncated_names)
+        elif len(truncated_names) > 2:
+            formatted_names = ", ".join(truncated_names[:-1]) + ", and " + truncated_names[-1]
+        else:
+            formatted_names = truncated_names[0] if truncated_names[0] == "the empty intersection" else "just " + truncated_names[0]
 
         return formatted_names