File size: 12,626 Bytes
9c6594c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 |
"""Functions for computing measures of structural holes."""
import networkx as nx
__all__ = ["constraint", "local_constraint", "effective_size"]
@nx._dispatchable(edge_attrs="weight")
def mutual_weight(G, u, v, weight=None):
"""Returns the sum of the weights of the edge from `u` to `v` and
the edge from `v` to `u` in `G`.
`weight` is the edge data key that represents the edge weight. If
the specified key is `None` or is not in the edge data for an edge,
that edge is assumed to have weight 1.
Pre-conditions: `u` and `v` must both be in `G`.
"""
try:
a_uv = G[u][v].get(weight, 1)
except KeyError:
a_uv = 0
try:
a_vu = G[v][u].get(weight, 1)
except KeyError:
a_vu = 0
return a_uv + a_vu
@nx._dispatchable(edge_attrs="weight")
def normalized_mutual_weight(G, u, v, norm=sum, weight=None):
"""Returns normalized mutual weight of the edges from `u` to `v`
with respect to the mutual weights of the neighbors of `u` in `G`.
`norm` specifies how the normalization factor is computed. It must
be a function that takes a single argument and returns a number.
The argument will be an iterable of mutual weights
of pairs ``(u, w)``, where ``w`` ranges over each (in- and
out-)neighbor of ``u``. Commons values for `normalization` are
``sum`` and ``max``.
`weight` can be ``None`` or a string, if None, all edge weights
are considered equal. Otherwise holds the name of the edge
attribute used as weight.
"""
scale = norm(mutual_weight(G, u, w, weight) for w in set(nx.all_neighbors(G, u)))
return 0 if scale == 0 else mutual_weight(G, u, v, weight) / scale
@nx._dispatchable(edge_attrs="weight")
def effective_size(G, nodes=None, weight=None):
r"""Returns the effective size of all nodes in the graph ``G``.
The *effective size* of a node's ego network is based on the concept
of redundancy. A person's ego network has redundancy to the extent
that her contacts are connected to each other as well. The
nonredundant part of a person's relationships is the effective
size of her ego network [1]_. Formally, the effective size of a
node $u$, denoted $e(u)$, is defined by
.. math::
e(u) = \sum_{v \in N(u) \setminus \{u\}}
\left(1 - \sum_{w \in N(v)} p_{uw} m_{vw}\right)
where $N(u)$ is the set of neighbors of $u$ and $p_{uw}$ is the
normalized mutual weight of the (directed or undirected) edges
joining $u$ and $v$, for each vertex $u$ and $v$ [1]_. And $m_{vw}$
is the mutual weight of $v$ and $w$ divided by $v$ highest mutual
weight with any of its neighbors. The *mutual weight* of $u$ and $v$
is the sum of the weights of edges joining them (edge weights are
assumed to be one if the graph is unweighted).
For the case of unweighted and undirected graphs, Borgatti proposed
a simplified formula to compute effective size [2]_
.. math::
e(u) = n - \frac{2t}{n}
where `t` is the number of ties in the ego network (not including
ties to ego) and `n` is the number of nodes (excluding ego).
Parameters
----------
G : NetworkX graph
The graph containing ``v``. Directed graphs are treated like
undirected graphs when computing neighbors of ``v``.
nodes : container, optional
Container of nodes in the graph ``G`` to compute the effective size.
If None, the effective size of every node is computed.
weight : None or string, optional
If None, all edge weights are considered equal.
Otherwise holds the name of the edge attribute used as weight.
Returns
-------
dict
Dictionary with nodes as keys and the effective size of the node as values.
Notes
-----
Isolated nodes, including nodes which only have self-loop edges, do not
have a well-defined effective size::
>>> G = nx.path_graph(3)
>>> G.add_edge(4, 4)
>>> nx.effective_size(G)
{0: 1.0, 1: 2.0, 2: 1.0, 4: nan}
Burt also defined the related concept of *efficiency* of a node's ego
network, which is its effective size divided by the degree of that
node [1]_. So you can easily compute efficiency:
>>> G = nx.DiGraph()
>>> G.add_edges_from([(0, 1), (0, 2), (1, 0), (2, 1)])
>>> esize = nx.effective_size(G)
>>> efficiency = {n: v / G.degree(n) for n, v in esize.items()}
See also
--------
constraint
References
----------
.. [1] Burt, Ronald S.
*Structural Holes: The Social Structure of Competition.*
Cambridge: Harvard University Press, 1995.
.. [2] Borgatti, S.
"Structural Holes: Unpacking Burt's Redundancy Measures"
CONNECTIONS 20(1):35-38.
http://www.analytictech.com/connections/v20(1)/holes.htm
"""
def redundancy(G, u, v, weight=None):
nmw = normalized_mutual_weight
r = sum(
nmw(G, u, w, weight=weight) * nmw(G, v, w, norm=max, weight=weight)
for w in set(nx.all_neighbors(G, u))
)
return 1 - r
# Check if scipy is available
try:
# Needed for errstate
import numpy as np
# make sure nx.adjacency_matrix will not raise
import scipy as sp
has_scipy = True
except:
has_scipy = False
if nodes is None and has_scipy:
# In order to compute constraint of all nodes,
# algorithms based on sparse matrices can be much faster
# Obtain the adjacency matrix
P = nx.adjacency_matrix(G, weight=weight)
# Calculate mutual weights
mutual_weights1 = P + P.T
mutual_weights2 = mutual_weights1.copy()
with np.errstate(divide="ignore"):
# Mutual_weights1 = Normalize mutual weights by row sums
mutual_weights1 /= mutual_weights1.sum(axis=1)[:, np.newaxis]
# Mutual_weights2 = Normalize mutual weights by row max
mutual_weights2 /= mutual_weights2.max(axis=1).toarray()
# Calculate effective sizes
r = 1 - (mutual_weights1 @ mutual_weights2.T).toarray()
effective_size = ((mutual_weights1 > 0) * r).sum(axis=1)
# Special treatment: isolated nodes (ignoring selfloops) marked with "nan"
sum_mutual_weights = mutual_weights1.sum(axis=1) - mutual_weights1.diagonal()
isolated_nodes = sum_mutual_weights == 0
effective_size[isolated_nodes] = float("nan")
# Use tolist() to automatically convert numpy scalars -> Python scalars
return dict(zip(G, effective_size.tolist()))
# Results for only requested nodes
effective_size = {}
if nodes is None:
nodes = G
# Use Borgatti's simplified formula for unweighted and undirected graphs
if not G.is_directed() and weight is None:
for v in nodes:
# Effective size is not defined for isolated nodes, including nodes
# with only self-edges
if all(u == v for u in G[v]):
effective_size[v] = float("nan")
continue
E = nx.ego_graph(G, v, center=False, undirected=True)
effective_size[v] = len(E) - (2 * E.size()) / len(E)
else:
for v in nodes:
# Effective size is not defined for isolated nodes, including nodes
# with only self-edges
if all(u == v for u in G[v]):
effective_size[v] = float("nan")
continue
effective_size[v] = sum(
redundancy(G, v, u, weight) for u in set(nx.all_neighbors(G, v))
)
return effective_size
@nx._dispatchable(edge_attrs="weight")
def constraint(G, nodes=None, weight=None):
r"""Returns the constraint on all nodes in the graph ``G``.
The *constraint* is a measure of the extent to which a node *v* is
invested in those nodes that are themselves invested in the
neighbors of *v*. Formally, the *constraint on v*, denoted `c(v)`,
is defined by
.. math::
c(v) = \sum_{w \in N(v) \setminus \{v\}} \ell(v, w)
where $N(v)$ is the subset of the neighbors of `v` that are either
predecessors or successors of `v` and $\ell(v, w)$ is the local
constraint on `v` with respect to `w` [1]_. For the definition of local
constraint, see :func:`local_constraint`.
Parameters
----------
G : NetworkX graph
The graph containing ``v``. This can be either directed or undirected.
nodes : container, optional
Container of nodes in the graph ``G`` to compute the constraint. If
None, the constraint of every node is computed.
weight : None or string, optional
If None, all edge weights are considered equal.
Otherwise holds the name of the edge attribute used as weight.
Returns
-------
dict
Dictionary with nodes as keys and the constraint on the node as values.
See also
--------
local_constraint
References
----------
.. [1] Burt, Ronald S.
"Structural holes and good ideas".
American Journal of Sociology (110): 349β399.
"""
# Check if scipy is available
try:
# Needed for errstate
import numpy as np
# make sure nx.adjacency_matrix will not raise
import scipy as sp
has_scipy = True
except:
has_scipy = False
if nodes is None and has_scipy:
# In order to compute constraint of all nodes,
# algorithms based on sparse matrices can be much faster
# Obtain the adjacency matrix
P = nx.adjacency_matrix(G, weight=weight)
# Calculate mutual weights
mutual_weights = P + P.T
# Normalize mutual weights by row sums
sum_mutual_weights = mutual_weights.sum(axis=1)
with np.errstate(divide="ignore"):
mutual_weights /= sum_mutual_weights[:, np.newaxis]
# Calculate local constraints and constraints
local_constraints = (mutual_weights + mutual_weights @ mutual_weights) ** 2
constraints = ((mutual_weights > 0) * local_constraints).sum(axis=1)
# Special treatment: isolated nodes marked with "nan"
isolated_nodes = sum_mutual_weights - 2 * mutual_weights.diagonal() == 0
constraints[isolated_nodes] = float("nan")
# Use tolist() to automatically convert numpy scalars -> Python scalars
return dict(zip(G, constraints.tolist()))
# Result for only requested nodes
constraint = {}
if nodes is None:
nodes = G
for v in nodes:
# Constraint is not defined for isolated nodes
if len(G[v]) == 0:
constraint[v] = float("nan")
continue
constraint[v] = sum(
local_constraint(G, v, n, weight) for n in set(nx.all_neighbors(G, v))
)
return constraint
@nx._dispatchable(edge_attrs="weight")
def local_constraint(G, u, v, weight=None):
r"""Returns the local constraint on the node ``u`` with respect to
the node ``v`` in the graph ``G``.
Formally, the *local constraint on u with respect to v*, denoted
$\ell(u, v)$, is defined by
.. math::
\ell(u, v) = \left(p_{uv} + \sum_{w \in N(v)} p_{uw} p_{wv}\right)^2,
where $N(v)$ is the set of neighbors of $v$ and $p_{uv}$ is the
normalized mutual weight of the (directed or undirected) edges
joining $u$ and $v$, for each vertex $u$ and $v$ [1]_. The *mutual
weight* of $u$ and $v$ is the sum of the weights of edges joining
them (edge weights are assumed to be one if the graph is
unweighted).
Parameters
----------
G : NetworkX graph
The graph containing ``u`` and ``v``. This can be either
directed or undirected.
u : node
A node in the graph ``G``.
v : node
A node in the graph ``G``.
weight : None or string, optional
If None, all edge weights are considered equal.
Otherwise holds the name of the edge attribute used as weight.
Returns
-------
float
The constraint of the node ``v`` in the graph ``G``.
See also
--------
constraint
References
----------
.. [1] Burt, Ronald S.
"Structural holes and good ideas".
American Journal of Sociology (110): 349β399.
"""
nmw = normalized_mutual_weight
direct = nmw(G, u, v, weight=weight)
indirect = sum(
nmw(G, u, w, weight=weight) * nmw(G, w, v, weight=weight)
for w in set(nx.all_neighbors(G, u))
)
return (direct + indirect) ** 2
|