How to Build Knowledge Graph Generation Pipelines From Text With kg-gen, NetworkX Analytics, and Interactive Visualizations

How to Build Knowledge Graph Generation Pipelines From Text With kg-gen, NetworkX Analytics, and Interactive Visualizations


print(“\n” + “=”*70 + “\n SECTION 6 — NetworkX analytics\n” + “=”*70)
def kg_to_networkx(graph):
G = nx.MultiDiGraph()
for e in graph.entities:
G.add_node(e)
for s, p, o in graph.relations:
G.add_edge(s, o, label=p)
return G
G = kg_to_networkx(g_big)
print(f”Nodes: {G.number_of_nodes()} Edges: {G.number_of_edges()}”)
H = nx.Graph(G)
deg_cent = nx.degree_centrality(H)
btw_cent = nx.betweenness_centrality(H)
pr_cent = nx.pagerank(nx.DiGraph(G)) if G.number_of_edges() else {}
def top(d, k=8): return sorted(d.items(), key=lambda x: -x[1])[:k]
print(“\nTop entities by degree centrality:”)
for n, v in top(deg_cent): print(f” {n:35s} {v:.3f}”)
print(“\nTop entities by betweenness:”)
for n, v in top(btw_cent): print(f” {n:35s} {v:.3f}”)
print(“\nTop entities by PageRank:”)
for n, v in top(pr_cent): print(f” {n:35s} {v:.3f}”)
try:
from networkx.algorithms.community import louvain_communities
communities = louvain_communities(H, seed=42)
except Exception:
import community as community_louvain
parts = community_louvain.best_partition(H, random_state=42)
bins = {}
for n, c in parts.items(): bins.setdefault(c, set()).add(n)
communities = list(bins.values())
print(f”\nDetected {len(communities)} communities:”)
for i, c in enumerate(communities):
print(f” Community {i}: {sorted(c)}”)
pred_counts = Counter(p for _, _, p in g_big.relations)
print(“\nMost common predicates:”)
for p, n in pred_counts.most_common(10):
print(f” {n:3d} {p}”)
print(“\n” + “=”*70 + “\n SECTION 7 — Custom pyvis viz\n” + “=”*70)
palette = [“#e6194B”,”#3cb44b”,”#ffe119″,”#4363d8″,”#f58231″,
“#911eb4″,”#42d4f4″,”#f032e6″,”#bfef45″,”#fabed4″]
node_color = {}
for i, c in enumerate(communities):
for n in c: node_color[n] = palette[i % len(palette)]
net = Network(height=”600px”, width=”100%”, directed=True,
bgcolor=”#ffffff”, font_color=”#222222″,
notebook=True, cdn_resources=”in_line”)
net.barnes_hut(gravity=-12000, spring_length=180)
for n in G.nodes:
size = 12 + 80 * pr_cent.get(n, 0.01)
net.add_node(n, label=n, color=node_color.get(n, “#888888″),
size=size, title=f”PageRank: {pr_cent.get(n,0):.3f}”)
for s, o, data in G.edges(data=True):
net.add_edge(s, o, label=data.get(“label”, “”), arrows=”to”)
pyvis_path = “kg_pyvis.html”
net.write_html(pyvis_path, notebook=False, open_browser=False)
print(f”Wrote {pyvis_path}”)
display(IFrame(pyvis_path, width=”100%”, height=620))



Source link

Leave a Reply

Your email address will not be published. Required fields are marked *

Pin It on Pinterest