Copy Right Notice: The materials presented below are for academic use only. Copyright and all rights therein are retained by the authors or by the respective copyright holders.
@inproceedings{zhou2025lightning,title={Lightning in the Dark: Uncovering Global IPv6 Router Interfaces and Their Security Implications},author={Zhou, Jiasheng and Liu, Ying and He, Lin and Shi, Xiaoyi and Yang, Yifan and Wei, Chentian and Cheng, Daguo and Gong, Wenwen and Yang, Jiahai},booktitle={Proceedings of the 33rd IEEE International Conference on Network Protocols (ICNP 2025)},year={2025},pages={},doi={},address={Seoul, South Korea},date={September 22-25},corr={true},}
ICNP
SubRecon: Efficient Internet wide IPv6 Subnet Discovery and Its Applications
Jiasheng Zhou, Ying Liu, Lin He, Yifan Yang, Xiaoyi Shi, Daguo Cheng, Chentian Wei, Yun Fan, and Guanglei Song
In Proceedings of the 33rd IEEE International Conference on Network Protocols (ICNP 2025) Seoul, South Korea, September 22-25, 2025
@inproceedings{zhou2025subrecon,title={SubRecon: Efficient Internet wide IPv6 Subnet Discovery and Its Applications},author={Zhou, Jiasheng and Liu, Ying and He, Lin and Yang, Yifan and Shi, Xiaoyi and Cheng, Daguo and Wei, Chentian and Fan, Yun and Song, Guanglei},booktitle={Proceedings of the 33rd IEEE International Conference on Network Protocols (ICNP 2025)},year={2025},pages={},doi={},address={Seoul, South Korea},date={September 22-25},corr={true},}
INFOCOM
6Map: Enabling Fast Active IPv6 Address Discovery with Programmable Switches
The vast address space of IPv6 makes it impractical to apply exhaustive scanning to survey the entire network. Existing studies that aim to quickly discover active IPv6 addresses by optimizing the scanning space face issues of poor scalability and high time complexity, which hinder a comprehensive understanding of the IPv6 network and impede the timely provision of security snapshots and the development of IPv6. Fortunately, the emergence of programmable switches provides an opportunity to address the above issues. To this end, we propose 6Map, a fast active IPv6 address discovery system based on programmable switches. We design a lightweight target generation algorithm running on the control plane of the programmable switch and implement fast scanning address generation on the switch ASIC. The experimental results show that IPv6 active address discovery efficiency is up to 40.1x compared to the state-of-the-art method at the 100M probe budget.
@inproceedings{zhou20256map,title={6Map: Enabling Fast Active IPv6 Address Discovery with Programmable Switches},author={Zhou, Jiasheng and He, Lin and Yang, Yifan and Shi, Xiaoyi and Cheng, Daguo and E, Jinlong and Liu, Ying and Zhang, Dong},booktitle={Proceedings of the 44th IEEE Conference on Computer Communications (INFOCOM)},year={2025},pages={1-10},doi={10.1109/INFOCOM55648.2025.11044705},address={London, United Kingdom},date={May 19-22},corr={true},}
ICNP
Gungnir: Autoregressive Model for Unified Generation of IPv6 Fully Responsive Prefixes
Chentian Wei, Ying Liu, Lin He, Daguo Cheng, and Jiasheng Zhou
In Proceedings of the 33rd IEEE International Conference on Network Protocols (ICNP 2025) Seoul, South Korea, September 22-25, 2025
@inproceedings{Wei2025gungnir,title={Gungnir: Autoregressive Model for Unified Generation of IPv6 Fully Responsive Prefixes},author={Wei, Chentian and Liu, Ying and He, Lin and Cheng, Daguo and Zhou, Jiasheng},booktitle={Proceedings of the 33rd IEEE International Conference on Network Protocols (ICNP 2025)},year={2025},pages={},doi={},address={Seoul, South Korea},date={September 22-25},corr={true},}
USENIX ATC
TGW: Operating an Efficient and Resilient Cloud Gateway at Scale
Yifan Yang, Lin He, Jiasheng Zhou, Xiaoyi Shi, Yichi Xu, Shicheng Wang, Jinlong E, Ying Liu , Junwei Zhang, Zhuang Yuan, and Hengyang Xu
In Proceedings of the 2025 USENIX Annual Technical Conference (USENIX ATC 2025) Boston, MA, USA, July 7 - 9, 2025
Large-scale cloud data centers have become a critical Internet infrastructure. As the cloud entrance, today’s cloud gateways have integrated multiple functions such as elastic public access and load balancing to cope with the rapid growth of services and requirements. To meet the demands of large-scale clouds for efficient packet forwarding, scalable state management, and high resilience, we design, deploy, and operate Tencent Gateway (TGW), an efficient and resilient cloud gateway at scale. Compared to other large cloud providers that primarily offer services like search, e-commerce, or short-form video, the "killer services" of Tencent Cloud are online gaming and live streaming, which come with much stricter requirements for latency, jitter, and packet loss. From a technological perspective, TGW is highly decoupled and modular, with core components focused on efficient forwarding planes, a scalable state migration mechanism, a resilient failure recovery mechanism, and a failure detection and localization system. In terms of engineering, TGW has been operating in large-scale, real-world industrial environments for eight years, during which we have gained extensive insights and experience. We evaluate TGW both in testbed and real-world scenarios. In our testbed, TGW’s single node achieves 2.9x the forwarding capacity of prior systems. Between clusters, states and traffic can be migrated in 4 s without packet loss. In our real-world environment, TGW handles tens of Tbps of traffic, with a worst-case packet drop rate ranging from 10-7 to 10-4, while balancing traffic across clusters. Additionally, TGW can quickly migrate states and traffic and recover from failures without tenant awareness, guided by our failure localization system, achieving 100% availability for years.
@inproceedings{yang2025tgw,title={TGW: Operating an Efficient and Resilient Cloud Gateway at Scale},author={Yang, Yifan and He, Lin and Zhou, Jiasheng and Shi, Xiaoyi and Xu, Yichi and Wang, Shicheng and E, Jinlong and Liu, Ying and Zhang, Junwei and Yuan, Zhuang and Xu, Hengyang},booktitle={Proceedings of the 2025 USENIX Annual Technical Conference (USENIX ATC 2025)},year={2025},isbn={978-1-939133-48-9},address={Boston, MA, USA},date={July 7 - 9},pages={199-215},url={https://www.usenix.org/conference/atc25/presentation/yang-yifan},publisher={USENIX Association},corr={true},}
WWW
Miresga: Accelerating Layer-7 Load Balancing with Programmable Switches
As online cloud services expand rapidly, layer-7 load balancing has become indispensable for maintaining service availability and performance. The emergence of programmable switches with both high performance and a certain degree of flexibility has made it possible to apply programmable switches to load balancing. Nevertheless, the meager memory capacity and the relatively sluggish speed of table entry insertion and deletion of programmable switches have severely constrained their performance. To this end, we introduce Miresga, a hybrid and high-performance layer-7 load balancing system by co-designing hardware and software. The core idea of Miresga is to maximize the utilization of hardware and software resources by rationally partitioning the layer-7 load balancing task, thereby improving performance. To achieve this, Miresga offloads the elephant flows, which account for the majority of traffic, to programmable switches that excel at packet processing, and Miresga utilizes general-purpose servers with stronger computational capabilities to parse application layer protocols and apply load balancing rules. To alleviate memory pressure on the programmable switch, Miresga employs a back-end agent to handle memory-intensive tasks, working in conjunction with the programmable switch to complete the offloaded tasks. This design leverages the performance advantages of the programmable switch while avoiding bottlenecks caused by its limited memory and table insertion speed. We implement the Miresga prototype with a 3.2 Tbps Intel Tofino switch and general-purpose servers. The evaluation results show that Miresga achieves 3.9x throughput and 0.4x latency compared to software load balancing solutions. Compared to state-of-the-art design employing programmable switches, Miresga achieves almost the same throughput and latency for delivering large objects and 5.0x throughput and 0.2x latency when transmitting small objects.
@inproceedings{shi2025miresga,title={Miresga: Accelerating Layer-7 Load Balancing with Programmable Switches},author={Shi, Xiaoyi and He, Lin and Zhou, Jiasheng and Yang, Yifan and Liu, Ying},booktitle={Proceedings of the 34th ACM Web Conference (WWW 2025)},year={2025},pages={2424-2434},doi={10.1145/3696410.3714809},address={Sydney, Australia},date={April 28 - May 2},corr={true},}
2024
SIGCOMM
P4runpro: Enabling Runtime Programmability for RMT Programmable Switches
Programmable switches have revolutionized network operations by enabling the flexible customization of packet processing logic using language like P4. However, changing the programs running on the switch requires disturbing traffic and suspending other unrelated programs. In this paper, we present P4runpro, enabling runtime data plane updates with dynamic resource allocation. The P4runpro data plane abstracts hardware resources and defines dynamically reconfigurable atomic operations that form packet processing logic. P4runpro provides runtime programming interfaces called P4runpro primitives for the operator to write high-level programs. We have designed the P4runpro compiler to automatically and consistently link the P4runpro programs to the running data plane. We implement our prototype on a Tofino switch. We implement 15 example runtime programs using P4runpro to demonstrate its generality and expressiveness. Our evaluation results show that compared to the state-of-the-art, P4runpro can respond within hundreds of milliseconds, achieve an average of 60% to 80% dynamic resource utilization, concurrently run ≈0.6K to ≈2.8K programs, and introduce lower overhead. Our case studies illustrate the benefit of runtime programming and prove the same functionality between P4runpro and conventional P4 programs.
@inproceedings{yang2024p4runpro,title={P4runpro: Enabling Runtime Programmability for RMT Programmable Switches},author={Yang, Yifan and He, Lin and Zhou, Jiasheng and Shi, Xiaoyi and Cao, Jiamin and Liu, Ying},booktitle={Proceedings of the 2024 Annual Conference of the ACM Special Interest Group on Data Communication (SIGCOMM)},year={2024},pages={921-937},doi={10.1145/3651890.3672230},address={Sydney, Australia},date={August 4-8},corr={true},}
ICC
N4: Network for N Neural Network Training
Jiasheng Zhou, Shengrui Lin , Hongyan Liu, Xinyang Chen , Pengpai Shi, Longlong Zhu, and Dong Zhang
In ICC 2024-IEEE International Conference on Communications Denver, CO, USA, June 9-13, 2024
As the amount of data and complexity of neural network models continue to grow, distributed training has become increasingly crucial for improving training speed. However, the bottleneck of distributed training is the communication overheads among distributed workers. Recent research has shown that performing in-network aggregation using programmable switches is a good way to accelerate distributed training. However, previous work has only targeted specific neural network models and can only be applied in specified network topologies. Administrators may train different models and train them in different network topologies. In order to generalize the approach of using programmable switches to accelerate distributed training, we propose N4, a programmable intra-switch acceleration framework that supports distributed training of multiple neural networks. N4 also realizes the deployment of distributed workers based on any topology. Our experimental results show that N4 ensures high performance and isolation when training numerous neural networks. N4 outperforms state-of-the-art systems, accelerating training for existing methods by up to 3.4x.
@inproceedings{zhou2024n4,title={N4: Network for N Neural Network Training},author={Zhou, Jiasheng and Lin, Shengrui and Liu, Hongyan and Chen, Xinyang and Shi, Pengpai and Zhu, Longlong and Zhang, Dong},booktitle={ICC 2024-IEEE International Conference on Communications},year={2024},pages={453--458},doi={10.1109/ICC51166.2024.10622794},address={Denver, CO, USA},date={June 9-13},corr={true},}
Journal & Magazine papers
2024
Computer Science
IntervalSketch:Approximate Statistical Method for Interval Items in Data Stream
The proportion of streaming databases is gradually increasing,and extracting the required information in the data streams of streaming databases is an important task.In this paper,we study interval items which refer to pairs of elements arriving with a fixed interval,and apply them to network scenarios.It is the first work to define and count interval items in data streams.To efficiently count the top-K interval items,IntervalSketch is proposed.IntervalSketch firstly chunks the data stream based on simulated annealing to accelerate the statistical speed,secondly,it uses Sketch to store the interval items,and lastly reduces the memory of storing the interval items in Sketch through the feature grouping storage strategy,which enhances the accuracy of counting the interval items.Extensive comparative experiments are carried out on two real datasets.Experimental results show that IntervalSketch significantly outperforms the baseline solution with the same memory,and the processing time is 1/3 1/2 of the baseline solution,the average absolute error and the average relative error are1/3 of the baseline solution.
@article{chen2024IntervalSketch,title={IntervalSketch:Approximate Statistical Method for Interval Items in Data Stream},author={Chen, Xinyang and Chen, Hanze and Zhou, Jiasheng and Huang, Jiaqing and Yu, Jiasuo and Zhu, Longlong and Zhang, Dong},journal={Computer Science},year={2024},volume={51},number={04},pages={4-10},}
Patents
2024
CN Patent
Address Detection Methods, Apparatus, Storage Media and Electronic Devices