基于“网络嵌入学习方法Node2Vec”特征提取的方法对“DM、NM、WDNM”进行特征提取。并生成这种类加权图且将边的权重、方向、节点强度等标记出来。

步骤一:环境准备

1.软件系统数据获取

首先,需要确保已经正确下载并解压了所有指定版本的Java开源软件系统:

# 创建项目目录并下载软件系统
mkdir -p java_software_systems
cd java_software_systems

# Apache Ant 1.6.1
wget https://archive.apache.org/dist/ant/binaries/ant-1.6.1-bin.tar.gz
tar -xzf ant-1.6.1-bin.tar.gz

# Hibernate 5.2.12
wget https://sourceforge.net/projects/hibernate/files/hibernate5/5.2.12.Final/hibernate-release-5.2.12.Final.tar.gz
tar -xzf hibernate-release-5.2.12.Final.tar.gz

# jEdit 5.1.0
wget https://sourceforge.net/projects/jedit/files/jEdit%205.1.0/jedit-5.1.0.tar.gz/download -O jedit-5.1.0.tar.gz
tar -xzf jedit-5.1.0.tar.gz

# JGAP 3.6.3
wget https://sourceforge.net/projects/jgap/files/jgap/3.6.3/jgap-3.6.3.zip
unzip jgap-3.6.3.zip

# JHotDraw 6.0b.1
wget https://sourceforge.net/projects/jhotdraw/files/jHotDraw%206.0b.1/jhotdraw-6.0b.1.tar.gz/download -O jhotdraw-6.0b.1.tar.gz
tar -xzf jhotdraw-6.0b.1.tar.gz

# JMeter 2.0.1
wget https://archive.apache.org/dist/jmeter/binaries/jmeter-2.0.1.zip
unzip jmeter-2.0.1.zip

# Log4j 2.10.0
wget https://archive.apache.org/dist/logging/log4j/2.10.0/apache-log4j-2.10.0-bin.zip
unzip apache-log4j-2.10.0-bin.zip

# Wro4J 1.6.3
wget https://sourceforge.net/projects/wro4j/files/wro4j-1.6.3/wro4j-1.6.3.tar.gz/download -O wro4j-1.6.3.tar.gz
tar -xzf wro4j-1.6.3.tar.gz

2.安装依赖库

pip install networkx numpy scikit-learn node2vec xmltodict pydot javalang

3.下载并安装 DependencyFinder

wget https://sourceforge.net/projects/depfind/files/latest/download -O DependencyFinder.jar

步骤二:解析 Java 项目生成依赖网络

编写解析脚本

# parse_java_project.py
import os
import xmltodict
import networkx as nx
import javalang

def parse_dependency(xml_path):
    with open(xml_path, 'r') as f:
        xml = xmltodict.parse(f.read())
    dependencies = []
    for dep in xml['Dependencies']['Dependency']:
        src = dep['@src']
        tgt = dep['@tgt']
        type_ = dep['@type']
        count = int(dep['@count']) if '@count' in dep else 1
        dependencies.append((src, tgt, {'type': type_, 'weight': count}))
    return dependencies

def generate_graph(project_dir):
    os.system(f"java -jar DependencyFinder.jar {project_dir} > temp.xml")
    deps = parse_dependency("temp.xml")
    G = nx.DiGraph()
    G.add_edges_from(deps)
    return G

def extract_class_metrics(java_source_dir):
    metrics = []
    for root, _, files in os.walk(java_source_dir):
        for file in files:
            if file.endswith(".java"):
                with open(os.path.join(root, file), "r") as f:
                    code = f.read()
                    try:
                        tree = javalang.parse.parse(code)
                        class_name = list(tree.types)[0].name
                        num_methods = len(list(tree.types)[0].methods)
                        num_attributes = len(list(tree.types)[0].fields)
                        metrics.append({
                            "class": class_name,
                            "NumMethods": num_methods,
                            "NumAttributes": num_attributes
                        })
                    except Exception as e:
                        print(f"Error parsing {file}: {e}")
    return pd.DataFrame(metrics)

# 示例调用
G = generate_graph("/path/to/Ant-1.6.1")
dm_df = extract_class_metrics("/path/to/Ant-1.6.1/src/main/java")

步骤三:Node2Vec 嵌入学习

训练嵌入

from node2vec import Node2Vec

# 假设 G 已由 generate_graph 生成
node2vec = Node2Vec(G, dimensions=32, walk_length=30, num_walks=200, p=0.25, q=2)
model = node2vec.fit(window=10, min_count=1)
embeddings = model.wv  # 节点嵌入向量

步骤四:特征提取(DM、NM、WDNM)

DM

import pandas as pd

def calculate_dm(dm_df):
    return dm_df.set_index("class")

dm_df = calculate_dm(dm_df)

NM

def calculate_nm(G):
    nm = {}
    for node in G.nodes:
        degree = G.degree(node)
        betweenness = nx.betweenness_centrality(G)[node]
        eigenvector = nx.eigenvector_centrality(G)[node]
        nm[node] = [degree, betweenness, eigenvector]
    return pd.DataFrame(nm).T

nm_df = calculate_nm(G)

WDNM

def calculate_wdnm(G):
    wdnm = {}
    for node in G.nodes:
        weighted_degree = sum(data['weight'] for _, _, data in G.out_edges(node, data=True))
        weighted_betweenness = nx.betweenness_centrality(G, weight='weight')[node]
        wdnm[node] = [weighted_degree, weighted_betweenness]
    return pd.DataFrame(wdnm).T

wdnm_df = calculate_wdnm(G)

合并特征

features = pd.concat([dm_df, nm_df, wdnm_df], axis=1)

步骤五:定义节点强度

补充节点强度

import numpy as np

def calculate_node_strength(model, G):
    node_strength = {}
    for node in G.nodes:
        if node in model.wv:
            # 使用 Node2Vec 嵌入向量的 L2 范数
            strength = np.linalg.norm(model.wv[node])
            node_strength[node] = strength
        else:
            node_strength[node] = 0  # 未出现在嵌入中的节点
    return node_strength

def calculate_pagerank(G):
    return nx.pagerank(G, weight='weight')

# 计算节点强度
node_strength = calculate_node_strength(model, G)
page_rank = calculate_pagerank(G)

步骤六:生成加权图示例

可视化代码

import matplotlib.pyplot as plt
from networkx.drawing.nx_agraph import graphviz_layout

def plot_weighted_graph(G, node_strength, page_rank, nodes_to_highlight=None):
    pos = graphviz_layout(G, prog='dot')  # 使用 GraphViz 的 dot 布局优化方向
    edge_labels = {(u, v): f"{d['weight']}" for u, v, d in G.edges(data=True)}
    
    # 节点大小和颜色映射为强度
    node_sizes = [node_strength[node] * 100 for node in G.nodes]
    node_colors = [page_rank[node] for node in G.nodes]
    
    nx.draw_networkx(
        G, pos,
        node_size=node_sizes,
        node_color=node_colors,
        cmap=plt.cm.viridis,
        with_labels=True,
        arrows=True,  # 显示边方向
        arrowstyle='->',
        arrowsize=10
    )
    nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)
    plt.title("加权有向图(方向、权重、节点强度)")
    plt.show()

# 示例:绘制 Ant 项目的关键子图
subgraph = G.subgraph(['org.apache.tools.ant.Main', 'org.apache.tools.ant.Project'])
plot_weighted_graph(subgraph, node_strength, page_rank)

完整流程示例

# 1. 解析项目
python parse_java_project.py /path/to/Ant-1.6.1

# 2. Node2Vec 训练
python -c """
import networkx as nx
from node2vec import Node2Vec
G = nx.read_gpickle('Ant-1.6.1.pkl')  # 假设已保存图结构
node2vec = Node2Vec(G, dimensions=32, walk_length=30, num_walks=200, p=0.25, q=2)
model = node2vec.fit(window=10, min_count=1)
model.wv.save_word2vec_format('ant_embeddings.txt')
"""

# 3. 特征提取与合并
python -c """
import pandas as pd
# 加载各特征DataFrame后合并
final_features = pd.concat([dm_df, nm_df, wdnm_df], axis=1)
final_features.to_csv('ant_features.csv')
"""

# 4. 可视化
python plot_weighted_graph.py --input Ant-1.6.1.pkl --output ant_graph.png

注意事项

  1. 路径替换:需将 /path/to/Ant-1.6.1 替换为实际项目路径。
  2. 标签定义:需根据实际需求定义关键类标签(例如通过规则或人工标注)。
  3. 性能优化:大规模项目需增加内存分配或分块处理。
  4. GraphViz 安装:确保已安装 GraphViz 工具(sudo apt-get install graphviz)。